diff options
Diffstat (limited to 'weed')
160 files changed, 16402 insertions, 0 deletions
diff --git a/weed/command/backup.go b/weed/command/backup.go new file mode 100644 index 000000000..0b3994027 --- /dev/null +++ b/weed/command/backup.go @@ -0,0 +1,90 @@ +package command + +import ( + "fmt" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +var ( + s BackupOptions +) + +type BackupOptions struct { + master *string + collection *string + dir *string + volumeId *int +} + +func init() { + cmdBackup.Run = runBackup // break init cycle + s.master = cmdBackup.Flag.String("server", "localhost:9333", "SeaweedFS master location") + s.collection = cmdBackup.Flag.String("collection", "", "collection name") + s.dir = cmdBackup.Flag.String("dir", ".", "directory to store volume data files") + s.volumeId = cmdBackup.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.") +} + +var cmdBackup = &Command{ + UsageLine: "backup -dir=. -volumeId=234 -server=localhost:9333", + Short: "incrementally backup a volume to local folder", + Long: `Incrementally backup volume data. + + It is expected that you use this inside a script, to loop through + all possible volume ids that needs to be backup to local folder. + + The volume id does not need to exist locally or even remotely. + This will help to backup future new volumes. + + Usually backing up is just copying the .dat (and .idx) files. + But it's tricky to incremententally copy the differences. + + The complexity comes when there are multiple addition, deletion and compaction. + This tool will handle them correctly and efficiently, avoiding unnecessary data transporation. + `, +} + +func runBackup(cmd *Command, args []string) bool { + if *s.volumeId == -1 { + return false + } + vid := storage.VolumeId(*s.volumeId) + + // find volume location, replication, ttl info + lookup, err := operation.Lookup(*s.master, vid.String()) + if err != nil { + fmt.Printf("Error looking up volume %d: %v\n", vid, err) + return true + } + volumeServer := lookup.Locations[0].Url + + stats, err := operation.GetVolumeSyncStatus(volumeServer, vid.String()) + if err != nil { + fmt.Printf("Error get volume %d status: %v\n", vid, err) + return true + } + ttl, err := storage.ReadTTL(stats.Ttl) + if err != nil { + fmt.Printf("Error get volume %d ttl %s: %v\n", vid, stats.Ttl, err) + return true + } + replication, err := storage.NewReplicaPlacementFromString(stats.Replication) + if err != nil { + fmt.Printf("Error get volume %d replication %s : %v\n", vid, stats.Replication, err) + return true + } + + v, err := storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl) + if err != nil { + fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err) + return true + } + + if err := v.Synchronize(volumeServer); err != nil { + fmt.Printf("Error synchronizing volume %d: %v\n", vid, err) + return true + } + + return true +} diff --git a/weed/command/benchmark.go b/weed/command/benchmark.go new file mode 100644 index 000000000..7e0802e30 --- /dev/null +++ b/weed/command/benchmark.go @@ -0,0 +1,532 @@ +package command + +import ( + "bufio" + "fmt" + "io" + "math" + "math/rand" + "os" + "runtime" + "runtime/pprof" + "sort" + "strings" + "sync" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type BenchmarkOptions struct { + server *string + concurrency *int + numberOfFiles *int + fileSize *int + idListFile *string + write *bool + deletePercentage *int + read *bool + sequentialRead *bool + collection *string + cpuprofile *string + maxCpu *int + secretKey *string +} + +var ( + b BenchmarkOptions + sharedBytes []byte +) + +func init() { + cmdBenchmark.Run = runbenchmark // break init cycle + cmdBenchmark.IsDebug = cmdBenchmark.Flag.Bool("debug", false, "verbose debug information") + b.server = cmdBenchmark.Flag.String("server", "localhost:9333", "SeaweedFS master location") + b.concurrency = cmdBenchmark.Flag.Int("c", 16, "number of concurrent write or read processes") + b.fileSize = cmdBenchmark.Flag.Int("size", 1024, "simulated file size in bytes, with random(0~63) bytes padding") + b.numberOfFiles = cmdBenchmark.Flag.Int("n", 1024*1024, "number of files to write for each thread") + b.idListFile = cmdBenchmark.Flag.String("list", os.TempDir()+"/benchmark_list.txt", "list of uploaded file ids") + b.write = cmdBenchmark.Flag.Bool("write", true, "enable write") + b.deletePercentage = cmdBenchmark.Flag.Int("deletePercent", 0, "the percent of writes that are deletes") + b.read = cmdBenchmark.Flag.Bool("read", true, "enable read") + b.sequentialRead = cmdBenchmark.Flag.Bool("readSequentially", false, "randomly read by ids from \"-list\" specified file") + b.collection = cmdBenchmark.Flag.String("collection", "benchmark", "write data to this collection") + b.cpuprofile = cmdBenchmark.Flag.String("cpuprofile", "", "cpu profile output file") + b.maxCpu = cmdBenchmark.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs") + b.secretKey = cmdBenchmark.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") + sharedBytes = make([]byte, 1024) +} + +var cmdBenchmark = &Command{ + UsageLine: "benchmark -server=localhost:9333 -c=10 -n=100000", + Short: "benchmark on writing millions of files and read out", + Long: `benchmark on an empty SeaweedFS file system. + + Two tests during benchmark: + 1) write lots of small files to the system + 2) read the files out + + The file content is mostly zero, but no compression is done. + + You can choose to only benchmark read or write. + During write, the list of uploaded file ids is stored in "-list" specified file. + You can also use your own list of file ids to run read test. + + Write speed and read speed will be collected. + The numbers are used to get a sense of the system. + Usually your network or the hard drive is the real bottleneck. + + Another thing to watch is whether the volumes are evenly distributed + to each volume server. Because the 7 more benchmark volumes are randomly distributed + to servers with free slots, it's highly possible some servers have uneven amount of + benchmark volumes. To remedy this, you can use this to grow the benchmark volumes + before starting the benchmark command: + http://localhost:9333/vol/grow?collection=benchmark&count=5 + + After benchmarking, you can clean up the written data by deleting the benchmark collection + http://localhost:9333/col/delete?collection=benchmark + + `, +} + +var ( + wait sync.WaitGroup + writeStats *stats + readStats *stats +) + +func runbenchmark(cmd *Command, args []string) bool { + fmt.Printf("This is SeaweedFS version %s %s %s\n", util.VERSION, runtime.GOOS, runtime.GOARCH) + if *b.maxCpu < 1 { + *b.maxCpu = runtime.NumCPU() + } + runtime.GOMAXPROCS(*b.maxCpu) + if *b.cpuprofile != "" { + f, err := os.Create(*b.cpuprofile) + if err != nil { + glog.Fatal(err) + } + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + } + + if *b.write { + bench_write() + } + + if *b.read { + bench_read() + } + + return true +} + +func bench_write() { + fileIdLineChan := make(chan string) + finishChan := make(chan bool) + writeStats = newStats(*b.concurrency) + idChan := make(chan int) + go writeFileIds(*b.idListFile, fileIdLineChan, finishChan) + for i := 0; i < *b.concurrency; i++ { + wait.Add(1) + go writeFiles(idChan, fileIdLineChan, &writeStats.localStats[i]) + } + writeStats.start = time.Now() + writeStats.total = *b.numberOfFiles + go writeStats.checkProgress("Writing Benchmark", finishChan) + for i := 0; i < *b.numberOfFiles; i++ { + idChan <- i + } + close(idChan) + wait.Wait() + writeStats.end = time.Now() + wait.Add(2) + finishChan <- true + finishChan <- true + wait.Wait() + close(finishChan) + writeStats.printStats() +} + +func bench_read() { + fileIdLineChan := make(chan string) + finishChan := make(chan bool) + readStats = newStats(*b.concurrency) + go readFileIds(*b.idListFile, fileIdLineChan) + readStats.start = time.Now() + readStats.total = *b.numberOfFiles + go readStats.checkProgress("Randomly Reading Benchmark", finishChan) + for i := 0; i < *b.concurrency; i++ { + wait.Add(1) + go readFiles(fileIdLineChan, &readStats.localStats[i]) + } + wait.Wait() + wait.Add(1) + finishChan <- true + wait.Wait() + close(finishChan) + readStats.end = time.Now() + readStats.printStats() +} + +type delayedFile struct { + enterTime time.Time + fp *operation.FilePart +} + +func writeFiles(idChan chan int, fileIdLineChan chan string, s *stat) { + defer wait.Done() + delayedDeleteChan := make(chan *delayedFile, 100) + var waitForDeletions sync.WaitGroup + secret := security.Secret(*b.secretKey) + + for i := 0; i < 7; i++ { + waitForDeletions.Add(1) + go func() { + defer waitForDeletions.Done() + for df := range delayedDeleteChan { + if df.enterTime.After(time.Now()) { + time.Sleep(df.enterTime.Sub(time.Now())) + } + if e := util.Delete("http://"+df.fp.Server+"/"+df.fp.Fid, + security.GenJwt(secret, df.fp.Fid)); e == nil { + s.completed++ + } else { + s.failed++ + } + } + }() + } + + for id := range idChan { + start := time.Now() + fileSize := int64(*b.fileSize + rand.Intn(64)) + fp := &operation.FilePart{Reader: &FakeReader{id: uint64(id), size: fileSize}, FileSize: fileSize} + if assignResult, err := operation.Assign(*b.server, 1, "", *b.collection, ""); err == nil { + fp.Server, fp.Fid, fp.Collection = assignResult.Url, assignResult.Fid, *b.collection + if _, err := fp.Upload(0, *b.server, secret); err == nil { + if rand.Intn(100) < *b.deletePercentage { + s.total++ + delayedDeleteChan <- &delayedFile{time.Now().Add(time.Second), fp} + } else { + fileIdLineChan <- fp.Fid + } + s.completed++ + s.transferred += fileSize + } else { + s.failed++ + fmt.Printf("Failed to write with error:%v\n", err) + } + writeStats.addSample(time.Now().Sub(start)) + if *cmdBenchmark.IsDebug { + fmt.Printf("writing %d file %s\n", id, fp.Fid) + } + } else { + s.failed++ + println("writing file error:", err.Error()) + } + } + close(delayedDeleteChan) + waitForDeletions.Wait() +} + +func readFiles(fileIdLineChan chan string, s *stat) { + defer wait.Done() + for fid := range fileIdLineChan { + if len(fid) == 0 { + continue + } + if fid[0] == '#' { + continue + } + if *cmdBenchmark.IsDebug { + fmt.Printf("reading file %s\n", fid) + } + parts := strings.SplitN(fid, ",", 2) + vid := parts[0] + start := time.Now() + ret, err := operation.Lookup(*b.server, vid) + if err != nil || len(ret.Locations) == 0 { + s.failed++ + println("!!!! volume id ", vid, " location not found!!!!!") + continue + } + server := ret.Locations[rand.Intn(len(ret.Locations))].Url + url := "http://" + server + "/" + fid + if bytesRead, err := util.Get(url); err == nil { + s.completed++ + s.transferred += int64(len(bytesRead)) + readStats.addSample(time.Now().Sub(start)) + } else { + s.failed++ + fmt.Printf("Failed to read %s error:%v\n", url, err) + } + } +} + +func writeFileIds(fileName string, fileIdLineChan chan string, finishChan chan bool) { + file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + glog.Fatalf("File to create file %s: %s\n", fileName, err) + } + defer file.Close() + + for { + select { + case <-finishChan: + wait.Done() + return + case line := <-fileIdLineChan: + file.Write([]byte(line)) + file.Write([]byte("\n")) + } + } +} + +func readFileIds(fileName string, fileIdLineChan chan string) { + file, err := os.Open(fileName) // For read access. + if err != nil { + glog.Fatalf("File to read file %s: %s\n", fileName, err) + } + defer file.Close() + + r := bufio.NewReader(file) + if *b.sequentialRead { + for { + if line, err := Readln(r); err == nil { + fileIdLineChan <- string(line) + } else { + break + } + } + } else { + lines := make([]string, 0, readStats.total) + for { + if line, err := Readln(r); err == nil { + lines = append(lines, string(line)) + } else { + break + } + } + if len(lines) > 0 { + for i := 0; i < readStats.total; i++ { + fileIdLineChan <- lines[rand.Intn(len(lines))] + } + } + } + + close(fileIdLineChan) +} + +const ( + benchResolution = 10000 //0.1 microsecond + benchBucket = 1000000000 / benchResolution +) + +// An efficient statics collecting and rendering +type stats struct { + data []int + overflow []int + localStats []stat + start time.Time + end time.Time + total int +} +type stat struct { + completed int + failed int + total int + transferred int64 +} + +var percentages = []int{50, 66, 75, 80, 90, 95, 98, 99, 100} + +func newStats(n int) *stats { + return &stats{ + data: make([]int, benchResolution), + overflow: make([]int, 0), + localStats: make([]stat, n), + } +} + +func (s *stats) addSample(d time.Duration) { + index := int(d / benchBucket) + if index < 0 { + fmt.Printf("This request takes %3.1f seconds, skipping!\n", float64(index)/10000) + } else if index < len(s.data) { + s.data[int(d/benchBucket)]++ + } else { + s.overflow = append(s.overflow, index) + } +} + +func (s *stats) checkProgress(testName string, finishChan chan bool) { + fmt.Printf("\n------------ %s ----------\n", testName) + ticker := time.Tick(time.Second) + lastCompleted, lastTransferred, lastTime := 0, int64(0), time.Now() + for { + select { + case <-finishChan: + wait.Done() + return + case t := <-ticker: + completed, transferred, taken, total := 0, int64(0), t.Sub(lastTime), s.total + for _, localStat := range s.localStats { + completed += localStat.completed + transferred += localStat.transferred + total += localStat.total + } + fmt.Printf("Completed %d of %d requests, %3.1f%% %3.1f/s %3.1fMB/s\n", + completed, total, float64(completed)*100/float64(total), + float64(completed-lastCompleted)*float64(int64(time.Second))/float64(int64(taken)), + float64(transferred-lastTransferred)*float64(int64(time.Second))/float64(int64(taken))/float64(1024*1024), + ) + lastCompleted, lastTransferred, lastTime = completed, transferred, t + } + } +} + +func (s *stats) printStats() { + completed, failed, transferred, total := 0, 0, int64(0), s.total + for _, localStat := range s.localStats { + completed += localStat.completed + failed += localStat.failed + transferred += localStat.transferred + total += localStat.total + } + timeTaken := float64(int64(s.end.Sub(s.start))) / 1000000000 + fmt.Printf("\nConcurrency Level: %d\n", *b.concurrency) + fmt.Printf("Time taken for tests: %.3f seconds\n", timeTaken) + fmt.Printf("Complete requests: %d\n", completed) + fmt.Printf("Failed requests: %d\n", failed) + fmt.Printf("Total transferred: %d bytes\n", transferred) + fmt.Printf("Requests per second: %.2f [#/sec]\n", float64(completed)/timeTaken) + fmt.Printf("Transfer rate: %.2f [Kbytes/sec]\n", float64(transferred)/1024/timeTaken) + n, sum := 0, 0 + min, max := 10000000, 0 + for i := 0; i < len(s.data); i++ { + n += s.data[i] + sum += s.data[i] * i + if s.data[i] > 0 { + if min > i { + min = i + } + if max < i { + max = i + } + } + } + n += len(s.overflow) + for i := 0; i < len(s.overflow); i++ { + sum += s.overflow[i] + if min > s.overflow[i] { + min = s.overflow[i] + } + if max < s.overflow[i] { + max = s.overflow[i] + } + } + avg := float64(sum) / float64(n) + varianceSum := 0.0 + for i := 0; i < len(s.data); i++ { + if s.data[i] > 0 { + d := float64(i) - avg + varianceSum += d * d * float64(s.data[i]) + } + } + for i := 0; i < len(s.overflow); i++ { + d := float64(s.overflow[i]) - avg + varianceSum += d * d + } + std := math.Sqrt(varianceSum / float64(n)) + fmt.Printf("\nConnection Times (ms)\n") + fmt.Printf(" min avg max std\n") + fmt.Printf("Total: %2.1f %3.1f %3.1f %3.1f\n", float32(min)/10, float32(avg)/10, float32(max)/10, std/10) + //printing percentiles + fmt.Printf("\nPercentage of the requests served within a certain time (ms)\n") + percentiles := make([]int, len(percentages)) + for i := 0; i < len(percentages); i++ { + percentiles[i] = n * percentages[i] / 100 + } + percentiles[len(percentiles)-1] = n + percentileIndex := 0 + currentSum := 0 + for i := 0; i < len(s.data); i++ { + currentSum += s.data[i] + if s.data[i] > 0 && percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] { + fmt.Printf(" %3d%% %5.1f ms\n", percentages[percentileIndex], float32(i)/10.0) + percentileIndex++ + for percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] { + percentileIndex++ + } + } + } + sort.Ints(s.overflow) + for i := 0; i < len(s.overflow); i++ { + currentSum++ + if percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] { + fmt.Printf(" %3d%% %5.1f ms\n", percentages[percentileIndex], float32(s.overflow[i])/10.0) + percentileIndex++ + for percentileIndex < len(percentiles) && currentSum >= percentiles[percentileIndex] { + percentileIndex++ + } + } + } +} + +// a fake reader to generate content to upload +type FakeReader struct { + id uint64 // an id number + size int64 // max bytes +} + +func (l *FakeReader) Read(p []byte) (n int, err error) { + if l.size <= 0 { + return 0, io.EOF + } + if int64(len(p)) > l.size { + n = int(l.size) + } else { + n = len(p) + } + if n >= 8 { + for i := 0; i < 8; i++ { + p[i] = byte(l.id >> uint(i*8)) + } + } + l.size -= int64(n) + return +} + +func (l *FakeReader) WriteTo(w io.Writer) (n int64, err error) { + size := int(l.size) + bufferSize := len(sharedBytes) + for size > 0 { + tempBuffer := sharedBytes + if size < bufferSize { + tempBuffer = sharedBytes[0:size] + } + count, e := w.Write(tempBuffer) + if e != nil { + return int64(size), e + } + size -= count + } + return l.size, nil +} + +func Readln(r *bufio.Reader) ([]byte, error) { + var ( + isPrefix = true + err error + line, ln []byte + ) + for isPrefix && err == nil { + line, isPrefix, err = r.ReadLine() + ln = append(ln, line...) + } + return ln, err +} diff --git a/weed/command/command.go b/weed/command/command.go new file mode 100644 index 000000000..d654f57cd --- /dev/null +++ b/weed/command/command.go @@ -0,0 +1,71 @@ +package command + +import ( + "flag" + "fmt" + "os" + "strings" +) + +var Commands = []*Command{ + cmdBenchmark, + cmdBackup, + cmdCompact, + cmdFix, + cmdServer, + cmdMaster, + cmdFiler, + cmdUpload, + cmdDownload, + cmdShell, + cmdVersion, + cmdVolume, + cmdExport, + cmdMount, +} + +type Command struct { + // Run runs the command. + // The args are the arguments after the command name. + Run func(cmd *Command, args []string) bool + + // UsageLine is the one-line usage message. + // The first word in the line is taken to be the command name. + UsageLine string + + // Short is the short description shown in the 'go help' output. + Short string + + // Long is the long message shown in the 'go help <this-command>' output. + Long string + + // Flag is a set of flags specific to this command. + Flag flag.FlagSet + + IsDebug *bool +} + +// Name returns the command's name: the first word in the usage line. +func (c *Command) Name() string { + name := c.UsageLine + i := strings.Index(name, " ") + if i >= 0 { + name = name[:i] + } + return name +} + +func (c *Command) Usage() { + fmt.Fprintf(os.Stderr, "Example: weed %s\n", c.UsageLine) + fmt.Fprintf(os.Stderr, "Default Usage:\n") + c.Flag.PrintDefaults() + fmt.Fprintf(os.Stderr, "Description:\n") + fmt.Fprintf(os.Stderr, " %s\n", strings.TrimSpace(c.Long)) + os.Exit(2) +} + +// Runnable reports whether the command can be run; otherwise +// it is a documentation pseudo-command such as importpath. +func (c *Command) Runnable() bool { + return c.Run != nil +} diff --git a/weed/command/compact.go b/weed/command/compact.go new file mode 100644 index 000000000..ba2fbf867 --- /dev/null +++ b/weed/command/compact.go @@ -0,0 +1,45 @@ +package command + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +func init() { + cmdCompact.Run = runCompact // break init cycle +} + +var cmdCompact = &Command{ + UsageLine: "compact -dir=/tmp -volumeId=234", + Short: "run weed tool compact on volume file", + Long: `Force an compaction to remove deleted files from volume files. + The compacted .dat file is stored as .cpd file. + The compacted .idx file is stored as .cpx file. + + `, +} + +var ( + compactVolumePath = cmdCompact.Flag.String("dir", ".", "data directory to store files") + compactVolumeCollection = cmdCompact.Flag.String("collection", "", "volume collection name") + compactVolumeId = cmdCompact.Flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir.") +) + +func runCompact(cmd *Command, args []string) bool { + + if *compactVolumeId == -1 { + return false + } + + vid := storage.VolumeId(*compactVolumeId) + v, err := storage.NewVolume(*compactVolumePath, *compactVolumeCollection, vid, + storage.NeedleMapInMemory, nil, nil) + if err != nil { + glog.Fatalf("Load Volume [ERROR] %s\n", err) + } + if err = v.Compact(); err != nil { + glog.Fatalf("Compact Volume [ERROR] %s\n", err) + } + + return true +} diff --git a/weed/command/download.go b/weed/command/download.go new file mode 100644 index 000000000..39ed2b38e --- /dev/null +++ b/weed/command/download.go @@ -0,0 +1,130 @@ +package command + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "path" + "strings" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + d DownloadOptions +) + +type DownloadOptions struct { + server *string + dir *string +} + +func init() { + cmdDownload.Run = runDownload // break init cycle + d.server = cmdDownload.Flag.String("server", "localhost:9333", "SeaweedFS master location") + d.dir = cmdDownload.Flag.String("dir", ".", "Download the whole folder recursively if specified.") +} + +var cmdDownload = &Command{ + UsageLine: "download -server=localhost:9333 -dir=one_directory fid1 [fid2 fid3 ...]", + Short: "download files by file id", + Long: `download files by file id. + + Usually you just need to use curl to lookup the file's volume server, and then download them directly. + This download tool combine the two steps into one. + + What's more, if you use "weed upload -maxMB=..." option to upload a big file divided into chunks, you can + use this tool to download the chunks and merge them automatically. + + `, +} + +func runDownload(cmd *Command, args []string) bool { + for _, fid := range args { + if e := downloadToFile(*d.server, fid, *d.dir); e != nil { + fmt.Println("Download Error: ", fid, e) + } + } + return true +} + +func downloadToFile(server, fileId, saveDir string) error { + fileUrl, lookupError := operation.LookupFileId(server, fileId) + if lookupError != nil { + return lookupError + } + filename, rc, err := util.DownloadUrl(fileUrl) + if err != nil { + return err + } + defer rc.Close() + if filename == "" { + filename = fileId + } + isFileList := false + if strings.HasSuffix(filename, "-list") { + // old command compatible + isFileList = true + filename = filename[0 : len(filename)-len("-list")] + } + f, err := os.OpenFile(path.Join(saveDir, filename), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) + if err != nil { + return err + } + defer f.Close() + if isFileList { + content, err := ioutil.ReadAll(rc) + if err != nil { + return err + } + fids := strings.Split(string(content), "\n") + for _, partId := range fids { + var n int + _, part, err := fetchContent(*d.server, partId) + if err == nil { + n, err = f.Write(part) + } + if err == nil && n < len(part) { + err = io.ErrShortWrite + } + if err != nil { + return err + } + } + } else { + if _, err = io.Copy(f, rc); err != nil { + return err + } + + } + return nil +} + +func fetchContent(server string, fileId string) (filename string, content []byte, e error) { + fileUrl, lookupError := operation.LookupFileId(server, fileId) + if lookupError != nil { + return "", nil, lookupError + } + var rc io.ReadCloser + if filename, rc, e = util.DownloadUrl(fileUrl); e != nil { + return "", nil, e + } + content, e = ioutil.ReadAll(rc) + rc.Close() + return +} + +func WriteFile(filename string, data []byte, perm os.FileMode) error { + f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm) + if err != nil { + return err + } + n, err := f.Write(data) + f.Close() + if err == nil && n < len(data) { + err = io.ErrShortWrite + } + return err +} diff --git a/weed/command/export.go b/weed/command/export.go new file mode 100644 index 000000000..481aa111b --- /dev/null +++ b/weed/command/export.go @@ -0,0 +1,213 @@ +package command + +import ( + "archive/tar" + "bytes" + "fmt" + "os" + "path" + "path/filepath" + "strconv" + "strings" + "text/template" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +const ( + defaultFnFormat = `{{.Mime}}/{{.Id}}:{{.Name}}` + timeFormat = "2006-01-02T15:04:05" +) + +var ( + export ExportOptions +) + +type ExportOptions struct { + dir *string + collection *string + volumeId *int +} + +var cmdExport = &Command{ + UsageLine: "export -dir=/tmp -volumeId=234 -o=/dir/name.tar -fileNameFormat={{.Name}} -newer='" + timeFormat + "'", + Short: "list or export files from one volume data file", + Long: `List all files in a volume, or Export all files in a volume to a tar file if the output is specified. + + The format of file name in the tar file can be customized. Default is {{.Mime}}/{{.Id}}:{{.Name}}. Also available is {{.Key}}. + + `, +} + +func init() { + cmdExport.Run = runExport // break init cycle + export.dir = cmdExport.Flag.String("dir", ".", "input data directory to store volume data files") + export.collection = cmdExport.Flag.String("collection", "", "the volume collection name") + export.volumeId = cmdExport.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.") +} + +var ( + output = cmdExport.Flag.String("o", "", "output tar file name, must ends with .tar, or just a \"-\" for stdout") + format = cmdExport.Flag.String("fileNameFormat", defaultFnFormat, "filename formatted with {{.Mime}} {{.Id}} {{.Name}} {{.Ext}}") + newer = cmdExport.Flag.String("newer", "", "export only files newer than this time, default is all files. Must be specified in RFC3339 without timezone") + + tarOutputFile *tar.Writer + tarHeader tar.Header + fileNameTemplate *template.Template + fileNameTemplateBuffer = bytes.NewBuffer(nil) + newerThan time.Time + newerThanUnix int64 = -1 + localLocation, _ = time.LoadLocation("Local") +) + +func runExport(cmd *Command, args []string) bool { + + var err error + + if *newer != "" { + if newerThan, err = time.ParseInLocation(timeFormat, *newer, localLocation); err != nil { + fmt.Println("cannot parse 'newer' argument: " + err.Error()) + return false + } + newerThanUnix = newerThan.Unix() + } + + if *export.volumeId == -1 { + return false + } + + if *output != "" { + if *output != "-" && !strings.HasSuffix(*output, ".tar") { + fmt.Println("the output file", *output, "should be '-' or end with .tar") + return false + } + + if fileNameTemplate, err = template.New("name").Parse(*format); err != nil { + fmt.Println("cannot parse format " + *format + ": " + err.Error()) + return false + } + + var outputFile *os.File + if *output == "-" { + outputFile = os.Stdout + } else { + if outputFile, err = os.Create(*output); err != nil { + glog.Fatalf("cannot open output tar %s: %s", *output, err) + } + } + defer outputFile.Close() + tarOutputFile = tar.NewWriter(outputFile) + defer tarOutputFile.Close() + t := time.Now() + tarHeader = tar.Header{Mode: 0644, + ModTime: t, Uid: os.Getuid(), Gid: os.Getgid(), + Typeflag: tar.TypeReg, + AccessTime: t, ChangeTime: t} + } + + fileName := strconv.Itoa(*export.volumeId) + if *export.collection != "" { + fileName = *export.collection + "_" + fileName + } + vid := storage.VolumeId(*export.volumeId) + indexFile, err := os.OpenFile(path.Join(*export.dir, fileName+".idx"), os.O_RDONLY, 0644) + if err != nil { + glog.Fatalf("Create Volume Index [ERROR] %s\n", err) + } + defer indexFile.Close() + + needleMap, err := storage.LoadNeedleMap(indexFile) + if err != nil { + glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err) + } + + var version storage.Version + + err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, + storage.NeedleMapInMemory, + func(superBlock storage.SuperBlock) error { + version = superBlock.Version() + return nil + }, true, func(n *storage.Needle, offset int64) error { + nv, ok := needleMap.Get(n.Id) + glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v", + n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped(), ok, nv) + if ok && nv.Size > 0 && int64(nv.Offset)*8 == offset { + if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { + glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", + n.LastModified, newerThanUnix) + return nil + } + return walker(vid, n, version) + } + if !ok { + glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size) + } else { + glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size) + } + return nil + }) + if err != nil { + glog.Fatalf("Export Volume File [ERROR] %s\n", err) + } + return true +} + +type nameParams struct { + Name string + Id uint64 + Mime string + Key string + Ext string +} + +func walker(vid storage.VolumeId, n *storage.Needle, version storage.Version) (err error) { + key := storage.NewFileIdFromNeedle(vid, n).String() + if tarOutputFile != nil { + fileNameTemplateBuffer.Reset() + if err = fileNameTemplate.Execute(fileNameTemplateBuffer, + nameParams{ + Name: string(n.Name), + Id: n.Id, + Mime: string(n.Mime), + Key: key, + Ext: filepath.Ext(string(n.Name)), + }, + ); err != nil { + return err + } + + fileName := fileNameTemplateBuffer.String() + + if n.IsGzipped() && path.Ext(fileName) != ".gz" { + fileName = fileName + ".gz" + } + + tarHeader.Name, tarHeader.Size = fileName, int64(len(n.Data)) + if n.HasLastModifiedDate() { + tarHeader.ModTime = time.Unix(int64(n.LastModified), 0) + } else { + tarHeader.ModTime = time.Unix(0, 0) + } + tarHeader.ChangeTime = tarHeader.ModTime + if err = tarOutputFile.WriteHeader(&tarHeader); err != nil { + return err + } + _, err = tarOutputFile.Write(n.Data) + } else { + size := n.DataSize + if version == storage.Version1 { + size = n.Size + } + fmt.Printf("key=%s Name=%s Size=%d gzip=%t mime=%s\n", + key, + n.Name, + size, + n.IsGzipped(), + n.Mime, + ) + } + return +} diff --git a/weed/command/filer.go b/weed/command/filer.go new file mode 100644 index 000000000..582d4e9c8 --- /dev/null +++ b/weed/command/filer.go @@ -0,0 +1,105 @@ +package command + +import ( + "net/http" + "os" + "strconv" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/server" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + f FilerOptions +) + +type FilerOptions struct { + master *string + ip *string + port *int + collection *string + defaultReplicaPlacement *string + dir *string + redirectOnRead *bool + disableDirListing *bool + secretKey *string + cassandra_server *string + cassandra_keyspace *string + redis_server *string + redis_password *string + redis_database *int +} + +func init() { + cmdFiler.Run = runFiler // break init cycle + f.master = cmdFiler.Flag.String("master", "localhost:9333", "master server location") + f.collection = cmdFiler.Flag.String("collection", "", "all data will be stored in this collection") + f.ip = cmdFiler.Flag.String("ip", "", "filer server http listen ip address") + f.port = cmdFiler.Flag.Int("port", 8888, "filer server http listen port") + f.dir = cmdFiler.Flag.String("dir", os.TempDir(), "directory to store meta data") + f.defaultReplicaPlacement = cmdFiler.Flag.String("defaultReplicaPlacement", "000", "default replication type if not specified") + f.redirectOnRead = cmdFiler.Flag.Bool("redirectOnRead", false, "whether proxy or redirect to volume server during file GET request") + f.disableDirListing = cmdFiler.Flag.Bool("disableDirListing", false, "turn off directory listing") + f.cassandra_server = cmdFiler.Flag.String("cassandra.server", "", "host[:port] of the cassandra server") + f.cassandra_keyspace = cmdFiler.Flag.String("cassandra.keyspace", "seaweed", "keyspace of the cassandra server") + f.redis_server = cmdFiler.Flag.String("redis.server", "", "host:port of the redis server, e.g., 127.0.0.1:6379") + f.redis_password = cmdFiler.Flag.String("redis.password", "", "password in clear text") + f.redis_database = cmdFiler.Flag.Int("redis.database", 0, "the database on the redis server") + f.secretKey = cmdFiler.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") + +} + +var cmdFiler = &Command{ + UsageLine: "filer -port=8888 -dir=/tmp -master=<ip:port>", + Short: "start a file server that points to a master server", + Long: `start a file server which accepts REST operation for any files. + + //create or overwrite the file, the directories /path/to will be automatically created + POST /path/to/file + //get the file content + GET /path/to/file + //create or overwrite the file, the filename in the multipart request will be used + POST /path/to/ + //return a json format subdirectory and files listing + GET /path/to/ + + Current <fullpath~fileid> mapping metadata store is local embedded leveldb. + It should be highly scalable to hundreds of millions of files on a modest machine. + + Future we will ensure it can avoid of being SPOF. + + `, +} + +func runFiler(cmd *Command, args []string) bool { + + if err := util.TestFolderWritable(*f.dir); err != nil { + glog.Fatalf("Check Meta Folder (-dir) Writable %s : %s", *f.dir, err) + } + + r := http.NewServeMux() + _, nfs_err := weed_server.NewFilerServer(r, *f.ip, *f.port, *f.master, *f.dir, *f.collection, + *f.defaultReplicaPlacement, *f.redirectOnRead, *f.disableDirListing, + *f.secretKey, + *f.cassandra_server, *f.cassandra_keyspace, + *f.redis_server, *f.redis_password, *f.redis_database, + ) + if nfs_err != nil { + glog.Fatalf("Filer startup error: %v", nfs_err) + } + glog.V(0).Infoln("Start Seaweed Filer", util.VERSION, "at port", strconv.Itoa(*f.port)) + filerListener, e := util.NewListener( + ":"+strconv.Itoa(*f.port), + time.Duration(10)*time.Second, + ) + if e != nil { + glog.Fatalf("Filer listener error: %v", e) + } + if e := http.Serve(filerListener, r); e != nil { + glog.Fatalf("Filer Fail to serve: %v", e) + } + + return true +} diff --git a/weed/command/fix.go b/weed/command/fix.go new file mode 100644 index 000000000..2ec74d026 --- /dev/null +++ b/weed/command/fix.go @@ -0,0 +1,70 @@ +package command + +import ( + "os" + "path" + "strconv" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +func init() { + cmdFix.Run = runFix // break init cycle +} + +var cmdFix = &Command{ + UsageLine: "fix -dir=/tmp -volumeId=234", + Short: "run weed tool fix on index file if corrupted", + Long: `Fix runs the SeaweedFS fix command to re-create the index .idx file. + + `, +} + +var ( + fixVolumePath = cmdFix.Flag.String("dir", ".", "data directory to store files") + fixVolumeCollection = cmdFix.Flag.String("collection", "", "the volume collection name") + fixVolumeId = cmdFix.Flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.") +) + +func runFix(cmd *Command, args []string) bool { + + if *fixVolumeId == -1 { + return false + } + + fileName := strconv.Itoa(*fixVolumeId) + if *fixVolumeCollection != "" { + fileName = *fixVolumeCollection + "_" + fileName + } + indexFile, err := os.OpenFile(path.Join(*fixVolumePath, fileName+".idx"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + glog.Fatalf("Create Volume Index [ERROR] %s\n", err) + } + defer indexFile.Close() + + nm := storage.NewNeedleMap(indexFile) + defer nm.Close() + + vid := storage.VolumeId(*fixVolumeId) + err = storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid, + storage.NeedleMapInMemory, + func(superBlock storage.SuperBlock) error { + return nil + }, false, func(n *storage.Needle, offset int64) error { + glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(), n.IsGzipped()) + if n.Size > 0 { + pe := nm.Put(n.Id, uint32(offset/storage.NeedlePaddingSize), n.Size) + glog.V(2).Infof("saved %d with error %v", n.Size, pe) + } else { + glog.V(2).Infof("skipping deleted file ...") + return nm.Delete(n.Id) + } + return nil + }) + if err != nil { + glog.Fatalf("Export Volume File [ERROR] %s\n", err) + } + + return true +} diff --git a/weed/command/master.go b/weed/command/master.go new file mode 100644 index 000000000..aed8fc793 --- /dev/null +++ b/weed/command/master.go @@ -0,0 +1,91 @@ +package command + +import ( + "net/http" + "os" + "runtime" + "strconv" + "strings" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/server" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/gorilla/mux" +) + +func init() { + cmdMaster.Run = runMaster // break init cycle +} + +var cmdMaster = &Command{ + UsageLine: "master -port=9333", + Short: "start a master server", + Long: `start a master server to provide volume=>location mapping service + and sequence number of file ids + + `, +} + +var ( + mport = cmdMaster.Flag.Int("port", 9333, "http listen port") + masterIp = cmdMaster.Flag.String("ip", "localhost", "master <ip>|<server> address") + masterBindIp = cmdMaster.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to") + metaFolder = cmdMaster.Flag.String("mdir", os.TempDir(), "data directory to store meta data") + masterPeers = cmdMaster.Flag.String("peers", "", "other master nodes in comma separated ip:port list, example: 127.0.0.1:9093,127.0.0.1:9094") + volumeSizeLimitMB = cmdMaster.Flag.Uint("volumeSizeLimitMB", 30*1000, "Master stops directing writes to oversized volumes.") + mpulse = cmdMaster.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats") + confFile = cmdMaster.Flag.String("conf", "/etc/weedfs/weedfs.conf", "Deprecating! xml configuration file") + defaultReplicaPlacement = cmdMaster.Flag.String("defaultReplication", "000", "Default replication type if not specified.") + mTimeout = cmdMaster.Flag.Int("idleTimeout", 10, "connection idle seconds") + mMaxCpu = cmdMaster.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs") + garbageThreshold = cmdMaster.Flag.String("garbageThreshold", "0.3", "threshold to vacuum and reclaim spaces") + masterWhiteListOption = cmdMaster.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.") + masterSecureKey = cmdMaster.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") + + masterWhiteList []string +) + +func runMaster(cmd *Command, args []string) bool { + if *mMaxCpu < 1 { + *mMaxCpu = runtime.NumCPU() + } + runtime.GOMAXPROCS(*mMaxCpu) + if err := util.TestFolderWritable(*metaFolder); err != nil { + glog.Fatalf("Check Meta Folder (-mdir) Writable %s : %s", *metaFolder, err) + } + if *masterWhiteListOption != "" { + masterWhiteList = strings.Split(*masterWhiteListOption, ",") + } + + r := mux.NewRouter() + ms := weed_server.NewMasterServer(r, *mport, *metaFolder, + *volumeSizeLimitMB, *mpulse, *confFile, *defaultReplicaPlacement, *garbageThreshold, + masterWhiteList, *masterSecureKey, + ) + + listeningAddress := *masterBindIp + ":" + strconv.Itoa(*mport) + + glog.V(0).Infoln("Start Seaweed Master", util.VERSION, "at", listeningAddress) + + listener, e := util.NewListener(listeningAddress, time.Duration(*mTimeout)*time.Second) + if e != nil { + glog.Fatalf("Master startup error: %v", e) + } + + go func() { + time.Sleep(100 * time.Millisecond) + myMasterAddress := *masterIp + ":" + strconv.Itoa(*mport) + var peers []string + if *masterPeers != "" { + peers = strings.Split(*masterPeers, ",") + } + raftServer := weed_server.NewRaftServer(r, peers, myMasterAddress, *metaFolder, ms.Topo, *mpulse) + ms.SetRaftServer(raftServer) + }() + + if e := http.Serve(listener, r); e != nil { + glog.Fatalf("Fail to serve: %v", e) + } + return true +} diff --git a/weed/command/mount.go b/weed/command/mount.go new file mode 100644 index 000000000..d6e87d76c --- /dev/null +++ b/weed/command/mount.go @@ -0,0 +1,35 @@ +package command + +type MountOptions struct { + filer *string + dir *string +} + +var ( + mountOptions MountOptions +) + +func init() { + cmdMount.Run = runMount // break init cycle + cmdMount.IsDebug = cmdMount.Flag.Bool("debug", false, "verbose debug information") + mountOptions.filer = cmdMount.Flag.String("filer", "localhost:8888", "weed filer location") + mountOptions.dir = cmdMount.Flag.String("dir", ".", "mount weed filer to this directory") +} + +var cmdMount = &Command{ + UsageLine: "mount -filer=localhost:8888 -dir=/some/dir", + Short: "mount weed filer to a directory as file system in userspace(FUSE)", + Long: `mount weed filer to userspace. + + Pre-requisites: + 1) have SeaweedFS master and volume servers running + 2) have a "weed filer" running + These 2 requirements can be achieved with one command "weed server -filer=true" + + This uses bazil.org/fuse, whichenables writing FUSE file systems on + Linux, and OS X. + + On OS X, it requires OSXFUSE (http://osxfuse.github.com/). + + `, +} diff --git a/weed/command/mount_notsupported.go b/weed/command/mount_notsupported.go new file mode 100644 index 000000000..3bf22ddc4 --- /dev/null +++ b/weed/command/mount_notsupported.go @@ -0,0 +1,15 @@ +// +build !linux +// +build !darwin + +package command + +import ( + "fmt" + "runtime" +) + +func runMount(cmd *Command, args []string) bool { + fmt.Printf("Mount is not supported on %s %s\n", runtime.GOOS, runtime.GOARCH) + + return true +} diff --git a/weed/command/mount_std.go b/weed/command/mount_std.go new file mode 100644 index 000000000..b086d8cbf --- /dev/null +++ b/weed/command/mount_std.go @@ -0,0 +1,106 @@ +// +build linux darwin + +package command + +import ( + "fmt" + "runtime" + + "bazil.org/fuse" + "bazil.org/fuse/fs" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" + "golang.org/x/net/context" +) + +func runMount(cmd *Command, args []string) bool { + fmt.Printf("This is SeaweedFS version %s %s %s\n", util.VERSION, runtime.GOOS, runtime.GOARCH) + if *mountOptions.dir == "" { + fmt.Printf("Please specify the mount directory via \"-dir\"") + return false + } + + c, err := fuse.Mount(*mountOptions.dir) + if err != nil { + glog.Fatal(err) + return false + } + + OnInterrupt(func() { + fuse.Unmount(*mountOptions.dir) + c.Close() + }) + + err = fs.Serve(c, WFS{}) + if err != nil { + fuse.Unmount(*mountOptions.dir) + } + + // check if the mount process has an error to report + <-c.Ready + if err := c.MountError; err != nil { + glog.Fatal(err) + } + + return true +} + +type File struct { + FileId filer.FileId + Name string +} + +func (File) Attr(context context.Context, attr *fuse.Attr) error { + return nil +} +func (File) ReadAll(ctx context.Context) ([]byte, error) { + return []byte("hello, world\n"), nil +} + +type Dir struct { + Path string + Id uint64 +} + +func (dir Dir) Attr(context context.Context, attr *fuse.Attr) error { + return nil +} + +func (dir Dir) Lookup(ctx context.Context, name string) (fs.Node, error) { + files_result, e := filer.ListFiles(*mountOptions.filer, dir.Path, name) + if e != nil { + return nil, fuse.ENOENT + } + if len(files_result.Files) > 0 { + return File{files_result.Files[0].Id, files_result.Files[0].Name}, nil + } + return nil, fmt.Errorf("File Not Found for %s", name) +} + +type WFS struct{} + +func (WFS) Root() (fs.Node, error) { + return Dir{}, nil +} + +func (dir *Dir) ReadDir(ctx context.Context) ([]fuse.Dirent, error) { + var ret []fuse.Dirent + if dirs, e := filer.ListDirectories(*mountOptions.filer, dir.Path); e == nil { + for _, d := range dirs.Directories { + dirId := uint64(d.Id) + ret = append(ret, fuse.Dirent{Inode: dirId, Name: d.Name, Type: fuse.DT_Dir}) + } + } + if files, e := filer.ListFiles(*mountOptions.filer, dir.Path, ""); e == nil { + for _, f := range files.Files { + if fileId, e := storage.ParseFileId(string(f.Id)); e == nil { + fileInode := uint64(fileId.VolumeId)<<48 + fileId.Key + ret = append(ret, fuse.Dirent{Inode: fileInode, Name: f.Name, Type: fuse.DT_File}) + } + + } + } + return ret, nil +} diff --git a/weed/command/server.go b/weed/command/server.go new file mode 100644 index 000000000..6ed1e5228 --- /dev/null +++ b/weed/command/server.go @@ -0,0 +1,291 @@ +package command + +import ( + "net/http" + "os" + "runtime" + "runtime/pprof" + "strconv" + "strings" + "sync" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/server" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/gorilla/mux" +) + +type ServerOptions struct { + cpuprofile *string +} + +var ( + serverOptions ServerOptions + filerOptions FilerOptions +) + +func init() { + cmdServer.Run = runServer // break init cycle +} + +var cmdServer = &Command{ + UsageLine: "server -port=8080 -dir=/tmp -volume.max=5 -ip=server_name", + Short: "start a server, including volume server, and automatically elect a master server", + Long: `start both a volume server to provide storage spaces + and a master server to provide volume=>location mapping service and sequence number of file ids + + This is provided as a convenient way to start both volume server and master server. + The servers are exactly the same as starting them separately. + + So other volume servers can use this embedded master server also. + + Optionally, one filer server can be started. Logically, filer servers should not be in a cluster. + They run with meta data on disk, not shared. So each filer server is different. + + `, +} + +var ( + serverIp = cmdServer.Flag.String("ip", "localhost", "ip or server name") + serverBindIp = cmdServer.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to") + serverMaxCpu = cmdServer.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs") + serverTimeout = cmdServer.Flag.Int("idleTimeout", 10, "connection idle seconds") + serverDataCenter = cmdServer.Flag.String("dataCenter", "", "current volume server's data center name") + serverRack = cmdServer.Flag.String("rack", "", "current volume server's rack name") + serverWhiteListOption = cmdServer.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.") + serverPeers = cmdServer.Flag.String("master.peers", "", "other master nodes in comma separated ip:masterPort list") + serverSecureKey = cmdServer.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") + serverGarbageThreshold = cmdServer.Flag.String("garbageThreshold", "0.3", "threshold to vacuum and reclaim spaces") + masterPort = cmdServer.Flag.Int("master.port", 9333, "master server http listen port") + masterMetaFolder = cmdServer.Flag.String("master.dir", "", "data directory to store meta data, default to same as -dir specified") + masterVolumeSizeLimitMB = cmdServer.Flag.Uint("master.volumeSizeLimitMB", 30*1000, "Master stops directing writes to oversized volumes.") + masterConfFile = cmdServer.Flag.String("master.conf", "/etc/weedfs/weedfs.conf", "xml configuration file") + masterDefaultReplicaPlacement = cmdServer.Flag.String("master.defaultReplicaPlacement", "000", "Default replication type if not specified.") + volumePort = cmdServer.Flag.Int("volume.port", 8080, "volume server http listen port") + volumePublicPort = cmdServer.Flag.Int("volume.port.public", 0, "volume server public port") + volumeDataFolders = cmdServer.Flag.String("dir", os.TempDir(), "directories to store data files. dir[,dir]...") + volumeMaxDataVolumeCounts = cmdServer.Flag.String("volume.max", "7", "maximum numbers of volumes, count[,count]...") + volumePulse = cmdServer.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats") + volumeIndexType = cmdServer.Flag.String("volume.index", "memory", "Choose [memory|leveldb|boltdb] mode for memory~performance balance.") + volumeFixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", true, "Adjust jpg orientation when uploading.") + volumeReadRedirect = cmdServer.Flag.Bool("volume.read.redirect", true, "Redirect moved or non-local volumes.") + volumeServerPublicUrl = cmdServer.Flag.String("volume.publicUrl", "", "publicly accessible address") + isStartingFiler = cmdServer.Flag.Bool("filer", false, "whether to start filer") + + serverWhiteList []string +) + +func init() { + serverOptions.cpuprofile = cmdServer.Flag.String("cpuprofile", "", "cpu profile output file") + filerOptions.master = cmdServer.Flag.String("filer.master", "", "default to current master server") + filerOptions.collection = cmdServer.Flag.String("filer.collection", "", "all data will be stored in this collection") + filerOptions.port = cmdServer.Flag.Int("filer.port", 8888, "filer server http listen port") + filerOptions.dir = cmdServer.Flag.String("filer.dir", "", "directory to store meta data, default to a 'filer' sub directory of what -mdir is specified") + filerOptions.defaultReplicaPlacement = cmdServer.Flag.String("filer.defaultReplicaPlacement", "", "Default replication type if not specified during runtime.") + filerOptions.redirectOnRead = cmdServer.Flag.Bool("filer.redirectOnRead", false, "whether proxy or redirect to volume server during file GET request") + filerOptions.disableDirListing = cmdServer.Flag.Bool("filer.disableDirListing", false, "turn off directory listing") + filerOptions.cassandra_server = cmdServer.Flag.String("filer.cassandra.server", "", "host[:port] of the cassandra server") + filerOptions.cassandra_keyspace = cmdServer.Flag.String("filer.cassandra.keyspace", "seaweed", "keyspace of the cassandra server") + filerOptions.redis_server = cmdServer.Flag.String("filer.redis.server", "", "host:port of the redis server, e.g., 127.0.0.1:6379") + filerOptions.redis_password = cmdServer.Flag.String("filer.redis.password", "", "redis password in clear text") + filerOptions.redis_database = cmdServer.Flag.Int("filer.redis.database", 0, "the database on the redis server") +} + +func runServer(cmd *Command, args []string) bool { + filerOptions.secretKey = serverSecureKey + if *serverOptions.cpuprofile != "" { + f, err := os.Create(*serverOptions.cpuprofile) + if err != nil { + glog.Fatal(err) + } + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + } + + if *filerOptions.redirectOnRead { + *isStartingFiler = true + } + + *filerOptions.master = *serverIp + ":" + strconv.Itoa(*masterPort) + + if *filerOptions.defaultReplicaPlacement == "" { + *filerOptions.defaultReplicaPlacement = *masterDefaultReplicaPlacement + } + + if *volumePublicPort == 0 { + *volumePublicPort = *volumePort + } + + if *serverMaxCpu < 1 { + *serverMaxCpu = runtime.NumCPU() + } + runtime.GOMAXPROCS(*serverMaxCpu) + + folders := strings.Split(*volumeDataFolders, ",") + maxCountStrings := strings.Split(*volumeMaxDataVolumeCounts, ",") + var maxCounts []int + for _, maxString := range maxCountStrings { + if max, e := strconv.Atoi(maxString); e == nil { + maxCounts = append(maxCounts, max) + } else { + glog.Fatalf("The max specified in -max not a valid number %s", maxString) + } + } + if len(folders) != len(maxCounts) { + glog.Fatalf("%d directories by -dir, but only %d max is set by -max", len(folders), len(maxCounts)) + } + for _, folder := range folders { + if err := util.TestFolderWritable(folder); err != nil { + glog.Fatalf("Check Data Folder(-dir) Writable %s : %s", folder, err) + } + } + + if *masterMetaFolder == "" { + *masterMetaFolder = folders[0] + } + if *isStartingFiler { + if *filerOptions.dir == "" { + *filerOptions.dir = *masterMetaFolder + "/filer" + os.MkdirAll(*filerOptions.dir, 0700) + } + if err := util.TestFolderWritable(*filerOptions.dir); err != nil { + glog.Fatalf("Check Mapping Meta Folder (-filer.dir=\"%s\") Writable: %s", *filerOptions.dir, err) + } + } + if err := util.TestFolderWritable(*masterMetaFolder); err != nil { + glog.Fatalf("Check Meta Folder (-mdir=\"%s\") Writable: %s", *masterMetaFolder, err) + } + + if *serverWhiteListOption != "" { + serverWhiteList = strings.Split(*serverWhiteListOption, ",") + } + + if *isStartingFiler { + go func() { + r := http.NewServeMux() + _, nfs_err := weed_server.NewFilerServer(r, *serverBindIp, *filerOptions.port, *filerOptions.master, *filerOptions.dir, *filerOptions.collection, + *filerOptions.defaultReplicaPlacement, + *filerOptions.redirectOnRead, *filerOptions.disableDirListing, + *filerOptions.secretKey, + *filerOptions.cassandra_server, *filerOptions.cassandra_keyspace, + *filerOptions.redis_server, *filerOptions.redis_password, *filerOptions.redis_database, + ) + if nfs_err != nil { + glog.Fatalf("Filer startup error: %v", nfs_err) + } + glog.V(0).Infoln("Start Seaweed Filer", util.VERSION, "at port", strconv.Itoa(*filerOptions.port)) + filerListener, e := util.NewListener( + ":"+strconv.Itoa(*filerOptions.port), + time.Duration(10)*time.Second, + ) + if e != nil { + glog.Fatalf("Filer listener error: %v", e) + } + if e := http.Serve(filerListener, r); e != nil { + glog.Fatalf("Filer Fail to serve: %v", e) + } + }() + } + + var raftWaitForMaster sync.WaitGroup + var volumeWait sync.WaitGroup + + raftWaitForMaster.Add(1) + volumeWait.Add(1) + + go func() { + r := mux.NewRouter() + ms := weed_server.NewMasterServer(r, *masterPort, *masterMetaFolder, + *masterVolumeSizeLimitMB, *volumePulse, *masterConfFile, *masterDefaultReplicaPlacement, *serverGarbageThreshold, + serverWhiteList, *serverSecureKey, + ) + + glog.V(0).Infoln("Start Seaweed Master", util.VERSION, "at", *serverIp+":"+strconv.Itoa(*masterPort)) + masterListener, e := util.NewListener(*serverBindIp+":"+strconv.Itoa(*masterPort), time.Duration(*serverTimeout)*time.Second) + if e != nil { + glog.Fatalf("Master startup error: %v", e) + } + + go func() { + raftWaitForMaster.Wait() + time.Sleep(100 * time.Millisecond) + myAddress := *serverIp + ":" + strconv.Itoa(*masterPort) + var peers []string + if *serverPeers != "" { + peers = strings.Split(*serverPeers, ",") + } + raftServer := weed_server.NewRaftServer(r, peers, myAddress, *masterMetaFolder, ms.Topo, *volumePulse) + ms.SetRaftServer(raftServer) + volumeWait.Done() + }() + + raftWaitForMaster.Done() + if e := http.Serve(masterListener, r); e != nil { + glog.Fatalf("Master Fail to serve:%s", e.Error()) + } + }() + + volumeWait.Wait() + time.Sleep(100 * time.Millisecond) + if *volumePublicPort == 0 { + *volumePublicPort = *volumePort + } + if *volumeServerPublicUrl == "" { + *volumeServerPublicUrl = *serverIp + ":" + strconv.Itoa(*volumePublicPort) + } + isSeperatedPublicPort := *volumePublicPort != *volumePort + volumeMux := http.NewServeMux() + publicVolumeMux := volumeMux + if isSeperatedPublicPort { + publicVolumeMux = http.NewServeMux() + } + volumeNeedleMapKind := storage.NeedleMapInMemory + switch *volumeIndexType { + case "leveldb": + volumeNeedleMapKind = storage.NeedleMapLevelDb + case "boltdb": + volumeNeedleMapKind = storage.NeedleMapBoltDb + } + volumeServer := weed_server.NewVolumeServer(volumeMux, publicVolumeMux, + *serverIp, *volumePort, *volumeServerPublicUrl, + folders, maxCounts, + volumeNeedleMapKind, + *serverIp+":"+strconv.Itoa(*masterPort), *volumePulse, *serverDataCenter, *serverRack, + serverWhiteList, *volumeFixJpgOrientation, *volumeReadRedirect, + ) + + glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "at", *serverIp+":"+strconv.Itoa(*volumePort)) + volumeListener, eListen := util.NewListener( + *serverBindIp+":"+strconv.Itoa(*volumePort), + time.Duration(*serverTimeout)*time.Second, + ) + if eListen != nil { + glog.Fatalf("Volume server listener error: %v", eListen) + } + if isSeperatedPublicPort { + publicListeningAddress := *serverIp + ":" + strconv.Itoa(*volumePublicPort) + glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "public at", publicListeningAddress) + publicListener, e := util.NewListener(publicListeningAddress, time.Duration(*serverTimeout)*time.Second) + if e != nil { + glog.Fatalf("Volume server listener error:%v", e) + } + go func() { + if e := http.Serve(publicListener, publicVolumeMux); e != nil { + glog.Fatalf("Volume server fail to serve public: %v", e) + } + }() + } + + OnInterrupt(func() { + volumeServer.Shutdown() + pprof.StopCPUProfile() + }) + + if e := http.Serve(volumeListener, volumeMux); e != nil { + glog.Fatalf("Volume server fail to serve:%v", e) + } + + return true +} diff --git a/weed/command/shell.go b/weed/command/shell.go new file mode 100644 index 000000000..19c5049c5 --- /dev/null +++ b/weed/command/shell.go @@ -0,0 +1,61 @@ +package command + +import ( + "bufio" + "fmt" + "os" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func init() { + cmdShell.Run = runShell // break init cycle +} + +var cmdShell = &Command{ + UsageLine: "shell", + Short: "run interactive commands, now just echo", + Long: `run interactive commands. + + `, +} + +var () + +func runShell(command *Command, args []string) bool { + r := bufio.NewReader(os.Stdin) + o := bufio.NewWriter(os.Stdout) + e := bufio.NewWriter(os.Stderr) + prompt := func() { + var err error + if _, err = o.WriteString("> "); err != nil { + glog.V(0).Infoln("error writing to stdout:", err) + } + if err = o.Flush(); err != nil { + glog.V(0).Infoln("error flushing stdout:", err) + } + } + readLine := func() string { + ret, err := r.ReadString('\n') + if err != nil { + fmt.Fprint(e, err) + os.Exit(1) + } + return ret + } + execCmd := func(cmd string) int { + if cmd != "" { + if _, err := o.WriteString(cmd); err != nil { + glog.V(0).Infoln("error writing to stdout:", err) + } + } + return 0 + } + + cmd := "" + for { + prompt() + cmd = readLine() + execCmd(cmd) + } +} diff --git a/weed/command/signal_handling.go b/weed/command/signal_handling.go new file mode 100644 index 000000000..182e2754d --- /dev/null +++ b/weed/command/signal_handling.go @@ -0,0 +1,31 @@ +// +build !plan9 + +package command + +import ( + "os" + "os/signal" + "syscall" +) + +func OnInterrupt(fn func()) { + // deal with control+c,etc + signalChan := make(chan os.Signal, 1) + // controlling terminal close, daemon not exit + signal.Ignore(syscall.SIGHUP) + signal.Notify(signalChan, + os.Interrupt, + os.Kill, + syscall.SIGALRM, + // syscall.SIGHUP, + syscall.SIGINT, + syscall.SIGTERM, + // syscall.SIGQUIT, + ) + go func() { + for _ = range signalChan { + fn() + os.Exit(0) + } + }() +} diff --git a/weed/command/signal_handling_notsupported.go b/weed/command/signal_handling_notsupported.go new file mode 100644 index 000000000..dfcc24a3e --- /dev/null +++ b/weed/command/signal_handling_notsupported.go @@ -0,0 +1,6 @@ +// +build plan9 + +package command + +func OnInterrupt(fn func()) { +} diff --git a/weed/command/upload.go b/weed/command/upload.go new file mode 100644 index 000000000..0dfa115bb --- /dev/null +++ b/weed/command/upload.go @@ -0,0 +1,108 @@ +package command + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/security" +) + +var ( + upload UploadOptions +) + +type UploadOptions struct { + server *string + dir *string + include *string + replication *string + collection *string + ttl *string + maxMB *int + secretKey *string +} + +func init() { + cmdUpload.Run = runUpload // break init cycle + cmdUpload.IsDebug = cmdUpload.Flag.Bool("debug", false, "verbose debug information") + upload.server = cmdUpload.Flag.String("server", "localhost:9333", "SeaweedFS master location") + upload.dir = cmdUpload.Flag.String("dir", "", "Upload the whole folder recursively if specified.") + upload.include = cmdUpload.Flag.String("include", "", "pattens of files to upload, e.g., *.pdf, *.html, ab?d.txt, works together with -dir") + upload.replication = cmdUpload.Flag.String("replication", "", "replication type") + upload.collection = cmdUpload.Flag.String("collection", "", "optional collection name") + upload.ttl = cmdUpload.Flag.String("ttl", "", "time to live, e.g.: 1m, 1h, 1d, 1M, 1y") + upload.maxMB = cmdUpload.Flag.Int("maxMB", 0, "split files larger than the limit") + upload.secretKey = cmdUpload.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") +} + +var cmdUpload = &Command{ + UsageLine: "upload -server=localhost:9333 file1 [file2 file3]\n weed upload -server=localhost:9333 -dir=one_directory -include=*.pdf", + Short: "upload one or a list of files", + Long: `upload one or a list of files, or batch upload one whole folder recursively. + + If uploading a list of files: + It uses consecutive file keys for the list of files. + e.g. If the file1 uses key k, file2 can be read via k_1 + + If uploading a whole folder recursively: + All files under the folder and subfolders will be uploaded, each with its own file key. + Optional parameter "-include" allows you to specify the file name patterns. + + If any file has a ".gz" extension, the content are considered gzipped already, and will be stored as is. + This can save volume server's gzipped processing and allow customizable gzip compression level. + The file name will strip out ".gz" and stored. For example, "jquery.js.gz" will be stored as "jquery.js". + + If "maxMB" is set to a positive number, files larger than it would be split into chunks and uploaded separatedly. + The list of file ids of those chunks would be stored in an additional chunk, and this additional chunk's file id would be returned. + + `, +} + +func runUpload(cmd *Command, args []string) bool { + secret := security.Secret(*upload.secretKey) + if len(cmdUpload.Flag.Args()) == 0 { + if *upload.dir == "" { + return false + } + filepath.Walk(*upload.dir, func(path string, info os.FileInfo, err error) error { + if err == nil { + if !info.IsDir() { + if *upload.include != "" { + if ok, _ := filepath.Match(*upload.include, filepath.Base(path)); !ok { + return nil + } + } + parts, e := operation.NewFileParts([]string{path}) + if e != nil { + return e + } + results, e := operation.SubmitFiles(*upload.server, parts, + *upload.replication, *upload.collection, + *upload.ttl, *upload.maxMB, secret) + bytes, _ := json.Marshal(results) + fmt.Println(string(bytes)) + if e != nil { + return e + } + } + } else { + fmt.Println(err) + } + return err + }) + } else { + parts, e := operation.NewFileParts(args) + if e != nil { + fmt.Println(e.Error()) + } + results, _ := operation.SubmitFiles(*upload.server, parts, + *upload.replication, *upload.collection, + *upload.ttl, *upload.maxMB, secret) + bytes, _ := json.Marshal(results) + fmt.Println(string(bytes)) + } + return true +} diff --git a/weed/command/version.go b/weed/command/version.go new file mode 100644 index 000000000..8fdd68ec8 --- /dev/null +++ b/weed/command/version.go @@ -0,0 +1,24 @@ +package command + +import ( + "fmt" + "runtime" + + "github.com/chrislusf/seaweedfs/weed/util" +) + +var cmdVersion = &Command{ + Run: runVersion, + UsageLine: "version", + Short: "print SeaweedFS version", + Long: `Version prints the SeaweedFS version`, +} + +func runVersion(cmd *Command, args []string) bool { + if len(args) != 0 { + cmd.Usage() + } + + fmt.Printf("version %s %s %s\n", util.VERSION, runtime.GOOS, runtime.GOARCH) + return true +} diff --git a/weed/command/volume.go b/weed/command/volume.go new file mode 100644 index 000000000..21369cbe9 --- /dev/null +++ b/weed/command/volume.go @@ -0,0 +1,165 @@ +package command + +import ( + "net/http" + "os" + "runtime" + "strconv" + "strings" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/server" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + v VolumeServerOptions +) + +type VolumeServerOptions struct { + port *int + publicPort *int + folders []string + folderMaxLimits []int + ip *string + publicUrl *string + bindIp *string + master *string + pulseSeconds *int + idleConnectionTimeout *int + maxCpu *int + dataCenter *string + rack *string + whiteList []string + indexType *string + fixJpgOrientation *bool + readRedirect *bool +} + +func init() { + cmdVolume.Run = runVolume // break init cycle + v.port = cmdVolume.Flag.Int("port", 8080, "http listen port") + v.publicPort = cmdVolume.Flag.Int("port.public", 0, "port opened to public") + v.ip = cmdVolume.Flag.String("ip", "", "ip or server name") + v.publicUrl = cmdVolume.Flag.String("publicUrl", "", "Publicly accessible address") + v.bindIp = cmdVolume.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to") + v.master = cmdVolume.Flag.String("mserver", "localhost:9333", "master server location") + v.pulseSeconds = cmdVolume.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats, must be smaller than or equal to the master's setting") + v.idleConnectionTimeout = cmdVolume.Flag.Int("idleTimeout", 10, "connection idle seconds") + v.maxCpu = cmdVolume.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs") + v.dataCenter = cmdVolume.Flag.String("dataCenter", "", "current volume server's data center name") + v.rack = cmdVolume.Flag.String("rack", "", "current volume server's rack name") + v.indexType = cmdVolume.Flag.String("index", "memory", "Choose [memory|leveldb|boltdb] mode for memory~performance balance.") + v.fixJpgOrientation = cmdVolume.Flag.Bool("images.fix.orientation", true, "Adjust jpg orientation when uploading.") + v.readRedirect = cmdVolume.Flag.Bool("read.redirect", true, "Redirect moved or non-local volumes.") +} + +var cmdVolume = &Command{ + UsageLine: "volume -port=8080 -dir=/tmp -max=5 -ip=server_name -mserver=localhost:9333", + Short: "start a volume server", + Long: `start a volume server to provide storage spaces + + `, +} + +var ( + volumeFolders = cmdVolume.Flag.String("dir", os.TempDir(), "directories to store data files. dir[,dir]...") + maxVolumeCounts = cmdVolume.Flag.String("max", "7", "maximum numbers of volumes, count[,count]...") + volumeWhiteListOption = cmdVolume.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.") +) + +func runVolume(cmd *Command, args []string) bool { + if *v.maxCpu < 1 { + *v.maxCpu = runtime.NumCPU() + } + runtime.GOMAXPROCS(*v.maxCpu) + + //Set multiple folders and each folder's max volume count limit' + v.folders = strings.Split(*volumeFolders, ",") + maxCountStrings := strings.Split(*maxVolumeCounts, ",") + for _, maxString := range maxCountStrings { + if max, e := strconv.Atoi(maxString); e == nil { + v.folderMaxLimits = append(v.folderMaxLimits, max) + } else { + glog.Fatalf("The max specified in -max not a valid number %s", maxString) + } + } + if len(v.folders) != len(v.folderMaxLimits) { + glog.Fatalf("%d directories by -dir, but only %d max is set by -max", len(v.folders), len(v.folderMaxLimits)) + } + for _, folder := range v.folders { + if err := util.TestFolderWritable(folder); err != nil { + glog.Fatalf("Check Data Folder(-dir) Writable %s : %s", folder, err) + } + } + + //security related white list configuration + if *volumeWhiteListOption != "" { + v.whiteList = strings.Split(*volumeWhiteListOption, ",") + } + + if *v.ip == "" { + *v.ip = "127.0.0.1" + } + + if *v.publicPort == 0 { + *v.publicPort = *v.port + } + if *v.publicUrl == "" { + *v.publicUrl = *v.ip + ":" + strconv.Itoa(*v.publicPort) + } + isSeperatedPublicPort := *v.publicPort != *v.port + + volumeMux := http.NewServeMux() + publicVolumeMux := volumeMux + if isSeperatedPublicPort { + publicVolumeMux = http.NewServeMux() + } + + volumeNeedleMapKind := storage.NeedleMapInMemory + switch *v.indexType { + case "leveldb": + volumeNeedleMapKind = storage.NeedleMapLevelDb + case "boltdb": + volumeNeedleMapKind = storage.NeedleMapBoltDb + } + volumeServer := weed_server.NewVolumeServer(volumeMux, publicVolumeMux, + *v.ip, *v.port, *v.publicUrl, + v.folders, v.folderMaxLimits, + volumeNeedleMapKind, + *v.master, *v.pulseSeconds, *v.dataCenter, *v.rack, + v.whiteList, + *v.fixJpgOrientation, *v.readRedirect, + ) + + listeningAddress := *v.bindIp + ":" + strconv.Itoa(*v.port) + glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "at", listeningAddress) + listener, e := util.NewListener(listeningAddress, time.Duration(*v.idleConnectionTimeout)*time.Second) + if e != nil { + glog.Fatalf("Volume server listener error:%v", e) + } + if isSeperatedPublicPort { + publicListeningAddress := *v.bindIp + ":" + strconv.Itoa(*v.publicPort) + glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "public at", publicListeningAddress) + publicListener, e := util.NewListener(publicListeningAddress, time.Duration(*v.idleConnectionTimeout)*time.Second) + if e != nil { + glog.Fatalf("Volume server listener error:%v", e) + } + go func() { + if e := http.Serve(publicListener, publicVolumeMux); e != nil { + glog.Fatalf("Volume server fail to serve public: %v", e) + } + }() + } + + OnInterrupt(func() { + volumeServer.Shutdown() + }) + + if e := http.Serve(listener, volumeMux); e != nil { + glog.Fatalf("Volume server fail to serve: %v", e) + } + return true +} diff --git a/weed/command/volume_test.go b/weed/command/volume_test.go new file mode 100644 index 000000000..7399f1248 --- /dev/null +++ b/weed/command/volume_test.go @@ -0,0 +1,13 @@ +package command + +import ( + "net/http" + "testing" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func TestXYZ(t *testing.T) { + glog.V(0).Infoln("Last-Modified", time.Unix(int64(1373273596), 0).UTC().Format(http.TimeFormat)) +} diff --git a/weed/compress/compression_test.go b/weed/compress/compression_test.go new file mode 100644 index 000000000..83b7c0055 --- /dev/null +++ b/weed/compress/compression_test.go @@ -0,0 +1,45 @@ +package compress + +import ( + "math/rand" + "testing" +) + +func TestSortedData(t *testing.T) { + data := make([]int32, 102400) + for i := 1; i < len(data); i++ { + data[i] = data[i-1] + rand.Int31n(15) + } + testCompressAndUncompress(t, data, "Sorted data") +} + +func TestUnsortedData(t *testing.T) { + data := make([]int32, 102400) + for i := 0; i < len(data); i++ { + data[i] = rand.Int31n(255) + } + testCompressAndUncompress(t, data, "Unsorted data") +} + +func testCompressAndUncompress(t *testing.T, data []int32, desc string) { + + compressed_data, err := Compress32(data) + if err != nil { + t.Fatal("Compress error", err.Error()) + } + uncompressed_data, err := Uncompress32(compressed_data, make([]int32, len(data)*2)) + if err != nil { + t.Fatal("Compress error", err.Error()) + } + if len(uncompressed_data) != len(data) { + t.Fatal("Len differs", len(data), len(uncompressed_data)) + } + for i := 0; i < len(data); i++ { + if data[i] != uncompressed_data[i] { + t.Fatal("Data differs:", i, data[i], uncompressed_data[i]) + } + } + + println(desc, " Data length:", len(data), " => Compressed length:", len(compressed_data)) + +} diff --git a/weed/compress/delta_binary_pack32.go b/weed/compress/delta_binary_pack32.go new file mode 100644 index 000000000..42ae8d42d --- /dev/null +++ b/weed/compress/delta_binary_pack32.go @@ -0,0 +1,32 @@ +package compress + +import ( + "github.com/reducedb/encoding/cursor" + "github.com/reducedb/encoding/delta/bp32" +) + +// Compress compresses in[]int32 to out[]int32 +func Compress32(in []int32) (out []int32, err error) { + out = make([]int32, len(in)*2) + inpos := cursor.New() + outpos := cursor.New() + + if err = bp32.New().Compress(in, inpos, len(in), out, outpos); err != nil { + return nil, err + } + + return out[:outpos.Get()], nil +} + +// Uncompress uncompresses in[]int32 to out[]int32 +func Uncompress32(in []int32, buffer []int32) (out []int32, err error) { + out = buffer + inpos := cursor.New() + outpos := cursor.New() + + if err = bp32.New().Uncompress(in, inpos, len(in), out, outpos); err != nil { + return nil, err + } + + return out[:outpos.Get()], nil +} diff --git a/weed/filer/cassandra_store/cassandra_store.go b/weed/filer/cassandra_store/cassandra_store.go new file mode 100644 index 000000000..4ee2f65be --- /dev/null +++ b/weed/filer/cassandra_store/cassandra_store.go @@ -0,0 +1,87 @@ +package cassandra_store + +import ( + "fmt" + + "github.com/chrislusf/seaweedfs/weed/glog" + + "github.com/gocql/gocql" +) + +/* + +Basically you need a table just like this: + +CREATE TABLE seaweed_files ( + path varchar, + fids list<varchar>, + PRIMARY KEY (path) +); + +Need to match flat_namespace.FlatNamespaceStore interface + Put(fullFileName string, fid string) (err error) + Get(fullFileName string) (fid string, err error) + Delete(fullFileName string) (fid string, err error) + +*/ +type CassandraStore struct { + cluster *gocql.ClusterConfig + session *gocql.Session +} + +func NewCassandraStore(keyspace string, hosts ...string) (c *CassandraStore, err error) { + c = &CassandraStore{} + c.cluster = gocql.NewCluster(hosts...) + c.cluster.Keyspace = keyspace + c.cluster.Consistency = gocql.Quorum + c.session, err = c.cluster.CreateSession() + if err != nil { + glog.V(0).Infof("Failed to open cassandra store, hosts %v, keyspace %s", hosts, keyspace) + } + return +} + +func (c *CassandraStore) Put(fullFileName string, fid string) (err error) { + var input []string + input = append(input, fid) + if err := c.session.Query( + `INSERT INTO seaweed_files (path, fids) VALUES (?, ?)`, + fullFileName, input).Exec(); err != nil { + glog.V(0).Infof("Failed to save file %s with id %s: %v", fullFileName, fid, err) + return err + } + return nil +} +func (c *CassandraStore) Get(fullFileName string) (fid string, err error) { + var output []string + if err := c.session.Query( + `select fids FROM seaweed_files WHERE path = ? LIMIT 1`, + fullFileName).Consistency(gocql.One).Scan(&output); err != nil { + if err != gocql.ErrNotFound { + glog.V(0).Infof("Failed to find file %s: %v", fullFileName, fid, err) + } + } + if len(output) == 0 { + return "", fmt.Errorf("No file id found for %s", fullFileName) + } + return output[0], nil +} + +// Currently the fid is not returned +func (c *CassandraStore) Delete(fullFileName string) (fid string, err error) { + if err := c.session.Query( + `DELETE FROM seaweed_files WHERE path = ?`, + fullFileName).Exec(); err != nil { + if err != gocql.ErrNotFound { + glog.V(0).Infof("Failed to delete file %s: %v", fullFileName, err) + } + return "", err + } + return "", nil +} + +func (c *CassandraStore) Close() { + if c.session != nil { + c.session.Close() + } +} diff --git a/weed/filer/cassandra_store/schema.cql b/weed/filer/cassandra_store/schema.cql new file mode 100644 index 000000000..d6f2bb093 --- /dev/null +++ b/weed/filer/cassandra_store/schema.cql @@ -0,0 +1,22 @@ +/* + +Here is the CQL to create the table.CassandraStore + +Optionally you can adjust the keyspace name and replication settings. + +For production server, very likely you want to set replication_factor to 3 + +*/ + +create keyspace seaweed WITH replication = { + 'class':'SimpleStrategy', + 'replication_factor':1 +}; + +use seaweed; + +CREATE TABLE seaweed_files ( + path varchar, + fids list<varchar>, + PRIMARY KEY (path) +); diff --git a/weed/filer/client_operations.go b/weed/filer/client_operations.go new file mode 100644 index 000000000..13e4854a4 --- /dev/null +++ b/weed/filer/client_operations.go @@ -0,0 +1,70 @@ +package filer + +import ( + "encoding/json" + "errors" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/util" + + "net/url" +) + +type ApiRequest struct { + Command string //"listFiles", "listDirectories" + Directory string + FileName string +} + +type ListFilesResult struct { + Files []FileEntry + Error string `json:"error,omitempty"` +} + +func ListFiles(server string, directory string, fileName string) (*ListFilesResult, error) { + var ret ListFilesResult + if err := call(server, ApiRequest{Command: "listFiles", Directory: directory, FileName: fileName}, &ret); err == nil { + if ret.Error != "" { + return nil, errors.New(ret.Error) + } + return &ret, nil + } else { + return nil, err + } +} + +type ListDirectoriesResult struct { + Directories []DirectoryEntry + Error string `json:"error,omitempty"` +} + +func ListDirectories(server string, directory string) (*ListDirectoriesResult, error) { + var ret ListDirectoriesResult + if err := call(server, ApiRequest{Command: "listDirectories", Directory: directory}, &ret); err == nil { + if ret.Error != "" { + return nil, errors.New(ret.Error) + } + return &ret, nil + } else { + return nil, err + } +} + +func call(server string, request ApiRequest, ret interface{}) error { + b, err := json.Marshal(request) + if err != nil { + fmt.Println("error:", err) + return nil + } + values := make(url.Values) + values.Add("request", string(b)) + jsonBlob, err := util.Post("http://"+server+"/__api__", values) + if err != nil { + return err + } + err = json.Unmarshal(jsonBlob, ret) + if err != nil { + return err + } + return nil +} diff --git a/weed/filer/embedded_filer/design.txt b/weed/filer/embedded_filer/design.txt new file mode 100644 index 000000000..45fec8fbe --- /dev/null +++ b/weed/filer/embedded_filer/design.txt @@ -0,0 +1,26 @@ +Design Assumptions: +1. the number of directories are magnitudely smaller than the number of files +2. unlimited number of files under any directories +Phylosophy: + metadata for directories and files should be separated +Design: + Store directories in normal map + all of directories hopefully all be in memory + efficient to move/rename/list_directories + Log directory changes to append only log file + Store files in sorted string table in <dir_id/filename> format + efficient to list_files, just simple iterator + efficient to locate files, binary search + +Testing: +1. starting server, "weed server -filer=true" +2. posting files to different folders +curl -F "filename=@design.txt" "http://localhost:8888/sources/" +curl -F "filename=@design.txt" "http://localhost:8888/design/" +curl -F "filename=@directory.go" "http://localhost:8888/sources/weed/go/" +curl -F "filename=@directory.go" "http://localhost:8888/sources/testing/go/" +curl -F "filename=@filer.go" "http://localhost:8888/sources/weed/go/" +curl -F "filename=@filer_in_leveldb.go" "http://localhost:8888/sources/weed/go/" +curl "http://localhost:8888/?pretty=y" +curl "http://localhost:8888/sources/weed/go/?pretty=y" +curl "http://localhost:8888/sources/weed/go/?pretty=y" diff --git a/weed/filer/embedded_filer/directory.go b/weed/filer/embedded_filer/directory.go new file mode 100644 index 000000000..4d4bd1c59 --- /dev/null +++ b/weed/filer/embedded_filer/directory.go @@ -0,0 +1,15 @@ +package embedded_filer + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" +) + +type DirectoryManager interface { + FindDirectory(dirPath string) (filer.DirectoryId, error) + ListDirectories(dirPath string) (dirs []filer.DirectoryEntry, err error) + MakeDirectory(currentDirPath string, dirName string) (filer.DirectoryId, error) + MoveUnderDirectory(oldDirPath string, newParentDirPath string) error + DeleteDirectory(dirPath string) error + //functions used by FUSE + FindDirectoryById(filer.DirectoryId, error) +} diff --git a/weed/filer/embedded_filer/directory_in_map.go b/weed/filer/embedded_filer/directory_in_map.go new file mode 100644 index 000000000..5100f3531 --- /dev/null +++ b/weed/filer/embedded_filer/directory_in_map.go @@ -0,0 +1,310 @@ +package embedded_filer + +import ( + "bufio" + "fmt" + "io" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var writeLock sync.Mutex //serialize changes to dir.log + +type DirectoryEntryInMap struct { + sync.Mutex + Name string + Parent *DirectoryEntryInMap + subDirectories map[string]*DirectoryEntryInMap + Id filer.DirectoryId +} + +func (de *DirectoryEntryInMap) getChild(dirName string) (*DirectoryEntryInMap, bool) { + de.Lock() + defer de.Unlock() + child, ok := de.subDirectories[dirName] + return child, ok +} +func (de *DirectoryEntryInMap) addChild(dirName string, child *DirectoryEntryInMap) { + de.Lock() + defer de.Unlock() + de.subDirectories[dirName] = child +} +func (de *DirectoryEntryInMap) removeChild(dirName string) { + de.Lock() + defer de.Unlock() + delete(de.subDirectories, dirName) +} +func (de *DirectoryEntryInMap) hasChildren() bool { + de.Lock() + defer de.Unlock() + return len(de.subDirectories) > 0 +} +func (de *DirectoryEntryInMap) children() (dirNames []filer.DirectoryEntry) { + de.Lock() + defer de.Unlock() + for k, v := range de.subDirectories { + dirNames = append(dirNames, filer.DirectoryEntry{Name: k, Id: v.Id}) + } + return dirNames +} + +type DirectoryManagerInMap struct { + Root *DirectoryEntryInMap + max filer.DirectoryId + logFile *os.File + isLoading bool +} + +func (dm *DirectoryManagerInMap) newDirectoryEntryInMap(parent *DirectoryEntryInMap, name string) (d *DirectoryEntryInMap, err error) { + d = &DirectoryEntryInMap{Name: name, Parent: parent, subDirectories: make(map[string]*DirectoryEntryInMap)} + var parts []string + for p := d; p != nil && p.Name != ""; p = p.Parent { + parts = append(parts, p.Name) + } + n := len(parts) + if n <= 0 { + return nil, fmt.Errorf("Failed to create folder %s/%s", parent.Name, name) + } + for i := 0; i < n/2; i++ { + parts[i], parts[n-1-i] = parts[n-1-i], parts[i] + } + dm.max++ + d.Id = dm.max + dm.log("add", "/"+strings.Join(parts, "/"), strconv.Itoa(int(d.Id))) + return d, nil +} + +func (dm *DirectoryManagerInMap) log(words ...string) { + if !dm.isLoading { + dm.logFile.WriteString(strings.Join(words, "\t") + "\n") + } +} + +func NewDirectoryManagerInMap(dirLogFile string) (dm *DirectoryManagerInMap, err error) { + dm = &DirectoryManagerInMap{} + //dm.Root do not use newDirectoryEntryInMap, since dm.max will be changed + dm.Root = &DirectoryEntryInMap{subDirectories: make(map[string]*DirectoryEntryInMap)} + if dm.logFile, err = os.OpenFile(dirLogFile, os.O_RDWR|os.O_CREATE, 0644); err != nil { + return nil, fmt.Errorf("cannot write directory log file %s: %v", dirLogFile, err) + } + return dm, dm.load() +} + +func (dm *DirectoryManagerInMap) processEachLine(line string) error { + if strings.HasPrefix(line, "#") { + return nil + } + if line == "" { + return nil + } + parts := strings.Split(line, "\t") + if len(parts) == 0 { + return nil + } + switch parts[0] { + case "add": + v, pe := strconv.Atoi(parts[2]) + if pe != nil { + return pe + } + if e := dm.loadDirectory(parts[1], filer.DirectoryId(v)); e != nil { + return e + } + case "mov": + newName := "" + if len(parts) >= 4 { + newName = parts[3] + } + if e := dm.MoveUnderDirectory(parts[1], parts[2], newName); e != nil { + return e + } + case "del": + if e := dm.DeleteDirectory(parts[1]); e != nil { + return e + } + default: + fmt.Printf("line %s has %s!\n", line, parts[0]) + return nil + } + return nil +} +func (dm *DirectoryManagerInMap) load() error { + dm.max = 0 + lines := bufio.NewReader(dm.logFile) + dm.isLoading = true + defer func() { dm.isLoading = false }() + for { + line, err := util.Readln(lines) + if err != nil && err != io.EOF { + return err + } + if pe := dm.processEachLine(string(line)); pe != nil { + return pe + } + if err == io.EOF { + return nil + } + } +} + +func (dm *DirectoryManagerInMap) findDirectory(dirPath string) (*DirectoryEntryInMap, error) { + if dirPath == "" { + return dm.Root, nil + } + dirPath = CleanFilePath(dirPath) + if dirPath == "/" { + return dm.Root, nil + } + parts := strings.Split(dirPath, "/") + dir := dm.Root + for i := 1; i < len(parts); i++ { + if sub, ok := dir.getChild(parts[i]); ok { + dir = sub + } else { + return dm.Root, fmt.Errorf("Directory %s Not Found", dirPath) + } + } + return dir, nil +} +func (dm *DirectoryManagerInMap) FindDirectory(dirPath string) (filer.DirectoryId, error) { + d, e := dm.findDirectory(dirPath) + if e == nil { + return d.Id, nil + } + return dm.Root.Id, e +} + +func (dm *DirectoryManagerInMap) loadDirectory(dirPath string, dirId filer.DirectoryId) error { + dirPath = CleanFilePath(dirPath) + if dirPath == "/" { + return nil + } + parts := strings.Split(dirPath, "/") + dir := dm.Root + for i := 1; i < len(parts); i++ { + sub, ok := dir.getChild(parts[i]) + if !ok { + writeLock.Lock() + if sub2, createdByOtherThread := dir.getChild(parts[i]); createdByOtherThread { + sub = sub2 + } else { + if i != len(parts)-1 { + writeLock.Unlock() + return fmt.Errorf("%s should be created after parent %s", dirPath, parts[i]) + } + var err error + sub, err = dm.newDirectoryEntryInMap(dir, parts[i]) + if err != nil { + writeLock.Unlock() + return err + } + if sub.Id != dirId { + writeLock.Unlock() + // the dir.log should be the same order as in-memory directory id + return fmt.Errorf("%s should be have id %v instead of %v", dirPath, sub.Id, dirId) + } + dir.addChild(parts[i], sub) + } + writeLock.Unlock() + } + dir = sub + } + return nil +} + +func (dm *DirectoryManagerInMap) makeDirectory(dirPath string) (dir *DirectoryEntryInMap, created bool) { + dirPath = CleanFilePath(dirPath) + if dirPath == "/" { + return dm.Root, false + } + parts := strings.Split(dirPath, "/") + dir = dm.Root + for i := 1; i < len(parts); i++ { + sub, ok := dir.getChild(parts[i]) + if !ok { + writeLock.Lock() + if sub2, createdByOtherThread := dir.getChild(parts[i]); createdByOtherThread { + sub = sub2 + } else { + var err error + sub, err = dm.newDirectoryEntryInMap(dir, parts[i]) + if err != nil { + writeLock.Unlock() + return nil, false + } + dir.addChild(parts[i], sub) + created = true + } + writeLock.Unlock() + } + dir = sub + } + return dir, created +} + +func (dm *DirectoryManagerInMap) MakeDirectory(dirPath string) (filer.DirectoryId, error) { + dir, _ := dm.makeDirectory(dirPath) + return dir.Id, nil +} + +func (dm *DirectoryManagerInMap) MoveUnderDirectory(oldDirPath string, newParentDirPath string, newName string) error { + writeLock.Lock() + defer writeLock.Unlock() + oldDir, oe := dm.findDirectory(oldDirPath) + if oe != nil { + return oe + } + parentDir, pe := dm.findDirectory(newParentDirPath) + if pe != nil { + return pe + } + dm.log("mov", oldDirPath, newParentDirPath, newName) + oldDir.Parent.removeChild(oldDir.Name) + if newName == "" { + newName = oldDir.Name + } + parentDir.addChild(newName, oldDir) + oldDir.Name = newName + oldDir.Parent = parentDir + return nil +} + +func (dm *DirectoryManagerInMap) ListDirectories(dirPath string) (dirNames []filer.DirectoryEntry, err error) { + d, e := dm.findDirectory(dirPath) + if e != nil { + return dirNames, e + } + return d.children(), nil +} +func (dm *DirectoryManagerInMap) DeleteDirectory(dirPath string) error { + writeLock.Lock() + defer writeLock.Unlock() + if dirPath == "/" { + return fmt.Errorf("Can not delete %s", dirPath) + } + d, e := dm.findDirectory(dirPath) + if e != nil { + return e + } + if d.hasChildren() { + return fmt.Errorf("dir %s still has sub directories", dirPath) + } + d.Parent.removeChild(d.Name) + d.Parent = nil + dm.log("del", dirPath) + return nil +} + +func CleanFilePath(fp string) string { + ret := filepath.Clean(fp) + if os.PathSeparator == '\\' { + return strings.Replace(ret, "\\", "/", -1) + } + return ret +} diff --git a/weed/filer/embedded_filer/directory_test.go b/weed/filer/embedded_filer/directory_test.go new file mode 100644 index 000000000..c8b3f1f30 --- /dev/null +++ b/weed/filer/embedded_filer/directory_test.go @@ -0,0 +1,86 @@ +package embedded_filer + +import ( + "os" + "strings" + "testing" +) + +func TestDirectory(t *testing.T) { + dm, _ := NewDirectoryManagerInMap("/tmp/dir.log") + defer func() { + if true { + os.Remove("/tmp/dir.log") + } + }() + dm.MakeDirectory("/a/b/c") + dm.MakeDirectory("/a/b/d") + dm.MakeDirectory("/a/b/e") + dm.MakeDirectory("/a/b/e/f") + dm.MakeDirectory("/a/b/e/f/g") + dm.MoveUnderDirectory("/a/b/e/f/g", "/a/b", "t") + if _, err := dm.FindDirectory("/a/b/e/f/g"); err == nil { + t.Fatal("/a/b/e/f/g should not exist any more after moving") + } + if _, err := dm.FindDirectory("/a/b/t"); err != nil { + t.Fatal("/a/b/t should exist after moving") + } + if _, err := dm.FindDirectory("/a/b/g"); err == nil { + t.Fatal("/a/b/g should not exist after moving") + } + dm.MoveUnderDirectory("/a/b/e/f", "/a/b", "") + if _, err := dm.FindDirectory("/a/b/f"); err != nil { + t.Fatal("/a/b/g should not exist after moving") + } + dm.MakeDirectory("/a/b/g/h/i") + dm.DeleteDirectory("/a/b/e/f") + dm.DeleteDirectory("/a/b/e") + dirNames, _ := dm.ListDirectories("/a/b/e") + for _, v := range dirNames { + println("sub1 dir:", v.Name, "id", v.Id) + } + dm.logFile.Close() + + var path []string + printTree(dm.Root, path) + + dm2, e := NewDirectoryManagerInMap("/tmp/dir.log") + if e != nil { + println("load error", e.Error()) + } + if !compare(dm.Root, dm2.Root) { + t.Fatal("restored dir not the same!") + } + printTree(dm2.Root, path) +} + +func printTree(node *DirectoryEntryInMap, path []string) { + println(strings.Join(path, "/") + "/" + node.Name) + path = append(path, node.Name) + for _, v := range node.subDirectories { + printTree(v, path) + } +} + +func compare(root1 *DirectoryEntryInMap, root2 *DirectoryEntryInMap) bool { + if len(root1.subDirectories) != len(root2.subDirectories) { + return false + } + if root1.Name != root2.Name { + return false + } + if root1.Id != root2.Id { + return false + } + if !(root1.Parent == nil && root2.Parent == nil) { + if root1.Parent.Id != root2.Parent.Id { + return false + } + } + for k, v := range root1.subDirectories { + if !compare(v, root2.subDirectories[k]) { + return false + } + } + return true +} diff --git a/weed/filer/embedded_filer/filer_embedded.go b/weed/filer/embedded_filer/filer_embedded.go new file mode 100644 index 000000000..27299eb40 --- /dev/null +++ b/weed/filer/embedded_filer/filer_embedded.go @@ -0,0 +1,141 @@ +package embedded_filer + +import ( + "errors" + "fmt" + "path/filepath" + "strings" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/operation" +) + +type FilerEmbedded struct { + master string + directories *DirectoryManagerInMap + files *FileListInLevelDb +} + +func NewFilerEmbedded(master string, dir string) (filer *FilerEmbedded, err error) { + dm, de := NewDirectoryManagerInMap(filepath.Join(dir, "dir.log")) + if de != nil { + return nil, de + } + fl, fe := NewFileListInLevelDb(dir) + if fe != nil { + return nil, fe + } + filer = &FilerEmbedded{ + master: master, + directories: dm, + files: fl, + } + return +} + +func (filer *FilerEmbedded) CreateFile(filePath string, fid string) (err error) { + dir, file := filepath.Split(filePath) + dirId, e := filer.directories.MakeDirectory(dir) + if e != nil { + return e + } + return filer.files.CreateFile(dirId, file, fid) +} +func (filer *FilerEmbedded) FindFile(filePath string) (fid string, err error) { + dir, file := filepath.Split(filePath) + dirId, e := filer.directories.FindDirectory(dir) + if e != nil { + return "", e + } + return filer.files.FindFile(dirId, file) +} +func (filer *FilerEmbedded) FindDirectory(dirPath string) (dirId filer.DirectoryId, err error) { + return filer.directories.FindDirectory(dirPath) +} +func (filer *FilerEmbedded) ListDirectories(dirPath string) (dirs []filer.DirectoryEntry, err error) { + return filer.directories.ListDirectories(dirPath) +} +func (filer *FilerEmbedded) ListFiles(dirPath string, lastFileName string, limit int) (files []filer.FileEntry, err error) { + dirId, e := filer.directories.FindDirectory(dirPath) + if e != nil { + return nil, e + } + return filer.files.ListFiles(dirId, lastFileName, limit), nil +} +func (filer *FilerEmbedded) DeleteDirectory(dirPath string, recursive bool) (err error) { + dirId, e := filer.directories.FindDirectory(dirPath) + if e != nil { + return e + } + if sub_dirs, sub_err := filer.directories.ListDirectories(dirPath); sub_err == nil { + if len(sub_dirs) > 0 && !recursive { + return fmt.Errorf("Fail to delete directory %s: %d sub directories found!", dirPath, len(sub_dirs)) + } + for _, sub := range sub_dirs { + if delete_sub_err := filer.DeleteDirectory(filepath.Join(dirPath, sub.Name), recursive); delete_sub_err != nil { + return delete_sub_err + } + } + } + list := filer.files.ListFiles(dirId, "", 100) + if len(list) != 0 && !recursive { + if !recursive { + return fmt.Errorf("Fail to delete non-empty directory %s!", dirPath) + } + } + for { + if len(list) == 0 { + return filer.directories.DeleteDirectory(dirPath) + } + var fids []string + for _, fileEntry := range list { + fids = append(fids, string(fileEntry.Id)) + } + if result_list, delete_file_err := operation.DeleteFiles(filer.master, fids); delete_file_err != nil { + return delete_file_err + } else { + if len(result_list.Errors) > 0 { + return errors.New(strings.Join(result_list.Errors, "\n")) + } + } + lastFile := list[len(list)-1] + list = filer.files.ListFiles(dirId, lastFile.Name, 100) + } + +} + +func (filer *FilerEmbedded) DeleteFile(filePath string) (fid string, err error) { + dir, file := filepath.Split(filePath) + dirId, e := filer.directories.FindDirectory(dir) + if e != nil { + return "", e + } + return filer.files.DeleteFile(dirId, file) +} + +/* +Move a folder or a file, with 4 Use cases: +mv fromDir toNewDir +mv fromDir toOldDir +mv fromFile toDir +mv fromFile toFile +*/ +func (filer *FilerEmbedded) Move(fromPath string, toPath string) error { + if _, dir_err := filer.FindDirectory(fromPath); dir_err == nil { + if _, err := filer.FindDirectory(toPath); err == nil { + // move folder under an existing folder + return filer.directories.MoveUnderDirectory(fromPath, toPath, "") + } + // move folder to a new folder + return filer.directories.MoveUnderDirectory(fromPath, filepath.Dir(toPath), filepath.Base(toPath)) + } + if fid, file_err := filer.DeleteFile(fromPath); file_err == nil { + if _, err := filer.FindDirectory(toPath); err == nil { + // move file under an existing folder + return filer.CreateFile(filepath.Join(toPath, filepath.Base(fromPath)), fid) + } + // move to a folder with new name + return filer.CreateFile(toPath, fid) + } + return fmt.Errorf("File %s is not found!", fromPath) +} diff --git a/weed/filer/embedded_filer/files_in_leveldb.go b/weed/filer/embedded_filer/files_in_leveldb.go new file mode 100644 index 000000000..19f6dd7e8 --- /dev/null +++ b/weed/filer/embedded_filer/files_in_leveldb.go @@ -0,0 +1,85 @@ +package embedded_filer + +import ( + "bytes" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/util" +) + +/* +The entry in level db has this format: + key: genKey(dirId, fileName) + value: []byte(fid) +And genKey(dirId, fileName) use first 4 bytes to store dirId, and rest for fileName +*/ + +type FileListInLevelDb struct { + db *leveldb.DB +} + +func NewFileListInLevelDb(dir string) (fl *FileListInLevelDb, err error) { + fl = &FileListInLevelDb{} + if fl.db, err = leveldb.OpenFile(dir, nil); err != nil { + return + } + return +} + +func genKey(dirId filer.DirectoryId, fileName string) []byte { + ret := make([]byte, 0, 4+len(fileName)) + for i := 3; i >= 0; i-- { + ret = append(ret, byte(dirId>>(uint(i)*8))) + } + ret = append(ret, []byte(fileName)...) + return ret +} + +func (fl *FileListInLevelDb) CreateFile(dirId filer.DirectoryId, fileName string, fid string) (err error) { + glog.V(4).Infoln("directory", dirId, "fileName", fileName, "fid", fid) + return fl.db.Put(genKey(dirId, fileName), []byte(fid), nil) +} +func (fl *FileListInLevelDb) DeleteFile(dirId filer.DirectoryId, fileName string) (fid string, err error) { + if fid, err = fl.FindFile(dirId, fileName); err != nil { + if err == leveldb.ErrNotFound { + return "", nil + } + return + } + err = fl.db.Delete(genKey(dirId, fileName), nil) + return fid, err +} +func (fl *FileListInLevelDb) FindFile(dirId filer.DirectoryId, fileName string) (fid string, err error) { + data, e := fl.db.Get(genKey(dirId, fileName), nil) + if e != nil { + return "", e + } + return string(data), nil +} +func (fl *FileListInLevelDb) ListFiles(dirId filer.DirectoryId, lastFileName string, limit int) (files []filer.FileEntry) { + glog.V(4).Infoln("directory", dirId, "lastFileName", lastFileName, "limit", limit) + dirKey := genKey(dirId, "") + iter := fl.db.NewIterator(&util.Range{Start: genKey(dirId, lastFileName)}, nil) + limitCounter := 0 + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, dirKey) { + break + } + fileName := string(key[len(dirKey):]) + if fileName == lastFileName { + continue + } + limitCounter++ + if limit > 0 { + if limitCounter > limit { + break + } + } + files = append(files, filer.FileEntry{Name: fileName, Id: filer.FileId(string(iter.Value()))}) + } + iter.Release() + return +} diff --git a/weed/filer/filer.go b/weed/filer/filer.go new file mode 100644 index 000000000..fd23e119c --- /dev/null +++ b/weed/filer/filer.go @@ -0,0 +1,28 @@ +package filer + +type FileId string //file id in SeaweedFS + +type FileEntry struct { + Name string `json:"name,omitempty"` //file name without path + Id FileId `json:"fid,omitempty"` +} + +type DirectoryId int32 + +type DirectoryEntry struct { + Name string //dir name without path + Id DirectoryId +} + +type Filer interface { + CreateFile(fullFileName string, fid string) (err error) + FindFile(fullFileName string) (fid string, err error) + DeleteFile(fullFileName string) (fid string, err error) + + //Optional functions. embedded filer support these + FindDirectory(dirPath string) (dirId DirectoryId, err error) + ListDirectories(dirPath string) (dirs []DirectoryEntry, err error) + ListFiles(dirPath string, lastFileName string, limit int) (files []FileEntry, err error) + DeleteDirectory(dirPath string, recursive bool) (err error) + Move(fromPath string, toPath string) (err error) +} diff --git a/weed/filer/flat_namespace/flat_namespace_filer.go b/weed/filer/flat_namespace/flat_namespace_filer.go new file mode 100644 index 000000000..c20fd2521 --- /dev/null +++ b/weed/filer/flat_namespace/flat_namespace_filer.go @@ -0,0 +1,50 @@ +package flat_namespace + +import ( + "errors" + + "github.com/chrislusf/seaweedfs/weed/filer" +) + +type FlatNamespaceFiler struct { + master string + store FlatNamespaceStore +} + +var ( + ErrNotImplemented = errors.New("Not Implemented for flat namespace meta data store") +) + +func NewFlatNamespaceFiler(master string, store FlatNamespaceStore) *FlatNamespaceFiler { + return &FlatNamespaceFiler{ + master: master, + store: store, + } +} + +func (filer *FlatNamespaceFiler) CreateFile(fullFileName string, fid string) (err error) { + return filer.store.Put(fullFileName, fid) +} +func (filer *FlatNamespaceFiler) FindFile(fullFileName string) (fid string, err error) { + return filer.store.Get(fullFileName) +} +func (filer *FlatNamespaceFiler) FindDirectory(dirPath string) (dirId filer.DirectoryId, err error) { + return 0, ErrNotImplemented +} +func (filer *FlatNamespaceFiler) ListDirectories(dirPath string) (dirs []filer.DirectoryEntry, err error) { + return nil, ErrNotImplemented +} +func (filer *FlatNamespaceFiler) ListFiles(dirPath string, lastFileName string, limit int) (files []filer.FileEntry, err error) { + return nil, ErrNotImplemented +} +func (filer *FlatNamespaceFiler) DeleteDirectory(dirPath string, recursive bool) (err error) { + return ErrNotImplemented +} + +func (filer *FlatNamespaceFiler) DeleteFile(fullFileName string) (fid string, err error) { + return filer.store.Delete(fullFileName) +} + +func (filer *FlatNamespaceFiler) Move(fromPath string, toPath string) error { + return ErrNotImplemented +} diff --git a/weed/filer/flat_namespace/flat_namespace_store.go b/weed/filer/flat_namespace/flat_namespace_store.go new file mode 100644 index 000000000..832b70e40 --- /dev/null +++ b/weed/filer/flat_namespace/flat_namespace_store.go @@ -0,0 +1,9 @@ +package flat_namespace + +import () + +type FlatNamespaceStore interface { + Put(fullFileName string, fid string) (err error) + Get(fullFileName string) (fid string, err error) + Delete(fullFileName string) (fid string, err error) +} diff --git a/weed/filer/redis_store/redis_store.go b/weed/filer/redis_store/redis_store.go new file mode 100644 index 000000000..939172557 --- /dev/null +++ b/weed/filer/redis_store/redis_store.go @@ -0,0 +1,48 @@ +package redis_store + +import ( + redis "gopkg.in/redis.v2" +) + +type RedisStore struct { + Client *redis.Client +} + +func NewRedisStore(hostPort string, password string, database int) *RedisStore { + client := redis.NewTCPClient(&redis.Options{ + Addr: hostPort, + Password: password, + DB: int64(database), + }) + return &RedisStore{Client: client} +} + +func (s *RedisStore) Get(fullFileName string) (fid string, err error) { + fid, err = s.Client.Get(fullFileName).Result() + if err == redis.Nil { + err = nil + } + return fid, err +} +func (s *RedisStore) Put(fullFileName string, fid string) (err error) { + _, err = s.Client.Set(fullFileName, fid).Result() + if err == redis.Nil { + err = nil + } + return err +} + +// Currently the fid is not returned +func (s *RedisStore) Delete(fullFileName string) (fid string, err error) { + _, err = s.Client.Del(fullFileName).Result() + if err == redis.Nil { + err = nil + } + return "", err +} + +func (s *RedisStore) Close() { + if s.Client != nil { + s.Client.Close() + } +} diff --git a/weed/glide.lock b/weed/glide.lock new file mode 100644 index 000000000..047c89eae --- /dev/null +++ b/weed/glide.lock @@ -0,0 +1,93 @@ +hash: fc2f4ccfa5d703b62b9c93047bae2295e6926ce1603c2164510c9010ff829945 +updated: 2016-06-02T12:26:29.018990826-07:00 +imports: +- name: bazil.org/fuse + version: 5d02b06737b3b3c2e6a44e03348b6f2b44aa6835 + subpackages: + - fs + - fuseutil +- name: github.com/boltdb/bolt + version: dfb21201d9270c1082d5fb0f07f500311ff72f18 +- name: github.com/chrislusf/raft + version: 5f7ddd8f479583daf05879d3d3b174aa202c8fb7 + subpackages: + - protobuf +- name: github.com/dgrijalva/jwt-go + version: 9b486c879bab3fde556ce8c27d9a2bb05d5b2c60 +- name: github.com/disintegration/imaging + version: d8bbae1de109b518dabc98c6c1633eb358c148a4 +- name: github.com/gocql/gocql + version: b7b8a0e04b0cb0ca0b379421c58ec6fab9939b85 + subpackages: + - internal/lru + - internal/murmur + - internal/streams +- name: github.com/gogo/protobuf + version: 7883e1468d48d969e1c3ce4bcde89b6a7dd4adc4 + subpackages: + - proto +- name: github.com/golang/protobuf + version: 3b06fc7a4cad73efce5fe6217ab6c33e7231ab4a + subpackages: + - proto +- name: github.com/golang/snappy + version: d9eb7a3d35ec988b8585d4a0068e462c27d28380 +- name: github.com/gorilla/context + version: aed02d124ae4a0e94fea4541c8effd05bf0c8296 +- name: github.com/gorilla/mux + version: bd09be08ed4377796d312df0a45314e11b8f5dc1 +- name: github.com/hailocab/go-hostpool + version: e80d13ce29ede4452c43dea11e79b9bc8a15b478 +- name: github.com/hashicorp/golang-lru + version: a0d98a5f288019575c6d1f4bb1573fef2d1fcdc4 + subpackages: + - simplelru +- name: github.com/klauspost/crc32 + version: 19b0b332c9e4516a6370a0456e6182c3b5036720 +- name: github.com/reducedb/encoding + version: e31efcfdf1b19f4698709a6f522feaf101b1b1f4 + subpackages: + - cursor + - delta/bp32 + - bitpacking +- name: github.com/rwcarlsen/goexif + version: 709fab3d192d7c62f86043caff1e7e3fb0f42bd8 + subpackages: + - exif + - tiff +- name: github.com/syndtr/goleveldb + version: cfa635847112c5dc4782e128fa7e0d05fdbfb394 + subpackages: + - leveldb + - leveldb/util + - leveldb/cache + - leveldb/comparer + - leveldb/errors + - leveldb/filter + - leveldb/iterator + - leveldb/journal + - leveldb/memdb + - leveldb/opt + - leveldb/storage + - leveldb/table +- name: golang.org/x/image + version: 97680175a5267bb8b31f1923e7a66df98013b11a + subpackages: + - bmp + - tiff + - tiff/lzw +- name: golang.org/x/net + version: c4c3ea71919de159c9e246d7be66deb7f0a39a58 + subpackages: + - context +- name: golang.org/x/sys + version: 076b546753157f758b316e59bcb51e6807c04057 + subpackages: + - unix +- name: gopkg.in/bufio.v1 + version: 567b2bfa514e796916c4747494d6ff5132a1dfce +- name: gopkg.in/inf.v0 + version: 3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4 +- name: gopkg.in/redis.v2 + version: e6179049628164864e6e84e973cfb56335748dea +devImports: [] diff --git a/weed/glide.yaml b/weed/glide.yaml new file mode 100644 index 000000000..e58b3679e --- /dev/null +++ b/weed/glide.yaml @@ -0,0 +1,31 @@ +package: github.com/chrislusf/seaweedfs/weed +import: +- package: bazil.org/fuse + subpackages: + - fs +- package: github.com/boltdb/bolt +- package: github.com/chrislusf/raft +- package: github.com/dgrijalva/jwt-go +- package: github.com/disintegration/imaging +- package: github.com/gocql/gocql +- package: github.com/golang/protobuf + subpackages: + - proto +- package: github.com/gorilla/mux +- package: github.com/hashicorp/golang-lru +- package: github.com/klauspost/crc32 +- package: github.com/reducedb/encoding + subpackages: + - cursor + - delta/bp32 +- package: github.com/rwcarlsen/goexif + subpackages: + - exif +- package: github.com/syndtr/goleveldb + subpackages: + - leveldb + - leveldb/util +- package: golang.org/x/net + subpackages: + - context +- package: gopkg.in/redis.v2 diff --git a/weed/glog/LICENSE b/weed/glog/LICENSE new file mode 100644 index 000000000..37ec93a14 --- /dev/null +++ b/weed/glog/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/weed/glog/README b/weed/glog/README new file mode 100644 index 000000000..5f9c11485 --- /dev/null +++ b/weed/glog/README @@ -0,0 +1,44 @@ +glog +==== + +Leveled execution logs for Go. + +This is an efficient pure Go implementation of leveled logs in the +manner of the open source C++ package + http://code.google.com/p/google-glog + +By binding methods to booleans it is possible to use the log package +without paying the expense of evaluating the arguments to the log. +Through the -vmodule flag, the package also provides fine-grained +control over logging at the file level. + +The comment from glog.go introduces the ideas: + + Package glog implements logging analogous to the Google-internal + C++ INFO/ERROR/V setup. It provides functions Info, Warning, + Error, Fatal, plus formatting variants such as Infof. It + also provides V-style logging controlled by the -v and + -vmodule=file=2 flags. + + Basic examples: + + glog.Info("Prepare to repel boarders") + + glog.Fatalf("Initialization failed: %s", err) + + See the documentation for the V function for an explanation + of these examples: + + if glog.V(2) { + glog.Info("Starting transaction...") + } + + glog.V(2).Infoln("Processed", nItems, "elements") + + +The repository contains an open source version of the log package +used inside Google. The master copy of the source lives inside +Google, not here. The code in this repo is for export only and is not itself +under development. Feature requests will be ignored. + +Send bug reports to golang-nuts@googlegroups.com. diff --git a/weed/glog/convenient_api.go b/weed/glog/convenient_api.go new file mode 100644 index 000000000..cb43d60e2 --- /dev/null +++ b/weed/glog/convenient_api.go @@ -0,0 +1,6 @@ +package glog + +/* +Copying the original glog because it is missing several convenient methods. +1. remove nano time in log format +*/ diff --git a/weed/glog/glog.go b/weed/glog/glog.go new file mode 100644 index 000000000..abd5678d4 --- /dev/null +++ b/weed/glog/glog.go @@ -0,0 +1,1181 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package glog implements logging analogous to the Google-internal C++ INFO/ERROR/V setup. +// It provides functions Info, Warning, Error, Fatal, plus formatting variants such as +// Infof. It also provides V-style logging controlled by the -v and -vmodule=file=2 flags. +// +// Basic examples: +// +// glog.Info("Prepare to repel boarders") +// +// glog.Fatalf("Initialization failed: %s", err) +// +// See the documentation for the V function for an explanation of these examples: +// +// if glog.V(2) { +// glog.Info("Starting transaction...") +// } +// +// glog.V(2).Infoln("Processed", nItems, "elements") +// +// Log output is buffered and written periodically using Flush. Programs +// should call Flush before exiting to guarantee all log output is written. +// +// By default, all log statements write to files in a temporary directory. +// This package provides several flags that modify this behavior. +// As a result, flag.Parse must be called before any logging is done. +// +// -logtostderr=false +// Logs are written to standard error instead of to files. +// -alsologtostderr=false +// Logs are written to standard error as well as to files. +// -stderrthreshold=ERROR +// Log events at or above this severity are logged to standard +// error as well as to files. +// -log_dir="" +// Log files will be written to this directory instead of the +// default temporary directory. +// +// Other flags provide aids to debugging. +// +// -log_backtrace_at="" +// When set to a file and line number holding a logging statement, +// such as +// -log_backtrace_at=gopherflakes.go:234 +// a stack trace will be written to the Info log whenever execution +// hits that statement. (Unlike with -vmodule, the ".go" must be +// present.) +// -v=0 +// Enable V-leveled logging at the specified level. +// -vmodule="" +// The syntax of the argument is a comma-separated list of pattern=N, +// where pattern is a literal file name (minus the ".go" suffix) or +// "glob" pattern and N is a V level. For instance, +// -vmodule=gopher*=3 +// sets the V level to 3 in all Go files whose names begin "gopher". +// +package glog + +import ( + "bufio" + "bytes" + "errors" + "flag" + "fmt" + "io" + stdLog "log" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" +) + +// severity identifies the sort of log: info, warning etc. It also implements +// the flag.Value interface. The -stderrthreshold flag is of type severity and +// should be modified only through the flag.Value interface. The values match +// the corresponding constants in C++. +type severity int32 // sync/atomic int32 + +// These constants identify the log levels in order of increasing severity. +// A message written to a high-severity log file is also written to each +// lower-severity log file. +const ( + infoLog severity = iota + warningLog + errorLog + fatalLog + numSeverity = 4 +) + +const severityChar = "IWEF" + +var severityName = []string{ + infoLog: "INFO", + warningLog: "WARNING", + errorLog: "ERROR", + fatalLog: "FATAL", +} + +// get returns the value of the severity. +func (s *severity) get() severity { + return severity(atomic.LoadInt32((*int32)(s))) +} + +// set sets the value of the severity. +func (s *severity) set(val severity) { + atomic.StoreInt32((*int32)(s), int32(val)) +} + +// String is part of the flag.Value interface. +func (s *severity) String() string { + return strconv.FormatInt(int64(*s), 10) +} + +// Get is part of the flag.Value interface. +func (s *severity) Get() interface{} { + return *s +} + +// Set is part of the flag.Value interface. +func (s *severity) Set(value string) error { + var threshold severity + // Is it a known name? + if v, ok := severityByName(value); ok { + threshold = v + } else { + v, err := strconv.Atoi(value) + if err != nil { + return err + } + threshold = severity(v) + } + logging.stderrThreshold.set(threshold) + return nil +} + +func severityByName(s string) (severity, bool) { + s = strings.ToUpper(s) + for i, name := range severityName { + if name == s { + return severity(i), true + } + } + return 0, false +} + +// OutputStats tracks the number of output lines and bytes written. +type OutputStats struct { + lines int64 + bytes int64 +} + +// Lines returns the number of lines written. +func (s *OutputStats) Lines() int64 { + return atomic.LoadInt64(&s.lines) +} + +// Bytes returns the number of bytes written. +func (s *OutputStats) Bytes() int64 { + return atomic.LoadInt64(&s.bytes) +} + +// Stats tracks the number of lines of output and number of bytes +// per severity level. Values must be read with atomic.LoadInt64. +var Stats struct { + Info, Warning, Error OutputStats +} + +var severityStats = [numSeverity]*OutputStats{ + infoLog: &Stats.Info, + warningLog: &Stats.Warning, + errorLog: &Stats.Error, +} + +// Level is exported because it appears in the arguments to V and is +// the type of the v flag, which can be set programmatically. +// It's a distinct type because we want to discriminate it from logType. +// Variables of type level are only changed under logging.mu. +// The -v flag is read only with atomic ops, so the state of the logging +// module is consistent. + +// Level is treated as a sync/atomic int32. + +// Level specifies a level of verbosity for V logs. *Level implements +// flag.Value; the -v flag is of type Level and should be modified +// only through the flag.Value interface. +type Level int32 + +// get returns the value of the Level. +func (l *Level) get() Level { + return Level(atomic.LoadInt32((*int32)(l))) +} + +// set sets the value of the Level. +func (l *Level) set(val Level) { + atomic.StoreInt32((*int32)(l), int32(val)) +} + +// String is part of the flag.Value interface. +func (l *Level) String() string { + return strconv.FormatInt(int64(*l), 10) +} + +// Get is part of the flag.Value interface. +func (l *Level) Get() interface{} { + return *l +} + +// Set is part of the flag.Value interface. +func (l *Level) Set(value string) error { + v, err := strconv.Atoi(value) + if err != nil { + return err + } + logging.mu.Lock() + defer logging.mu.Unlock() + logging.setVState(Level(v), logging.vmodule.filter, false) + return nil +} + +// moduleSpec represents the setting of the -vmodule flag. +type moduleSpec struct { + filter []modulePat +} + +// modulePat contains a filter for the -vmodule flag. +// It holds a verbosity level and a file pattern to match. +type modulePat struct { + pattern string + literal bool // The pattern is a literal string + level Level +} + +// match reports whether the file matches the pattern. It uses a string +// comparison if the pattern contains no metacharacters. +func (m *modulePat) match(file string) bool { + if m.literal { + return file == m.pattern + } + match, _ := filepath.Match(m.pattern, file) + return match +} + +func (m *moduleSpec) String() string { + // Lock because the type is not atomic. TODO: clean this up. + logging.mu.Lock() + defer logging.mu.Unlock() + var b bytes.Buffer + for i, f := range m.filter { + if i > 0 { + b.WriteRune(',') + } + fmt.Fprintf(&b, "%s=%d", f.pattern, f.level) + } + return b.String() +} + +// Get is part of the (Go 1.2) flag.Getter interface. It always returns nil for this flag type since the +// struct is not exported. +func (m *moduleSpec) Get() interface{} { + return nil +} + +var errVmoduleSyntax = errors.New("syntax error: expect comma-separated list of filename=N") + +// Syntax: -vmodule=recordio=2,file=1,gfs*=3 +func (m *moduleSpec) Set(value string) error { + var filter []modulePat + for _, pat := range strings.Split(value, ",") { + if len(pat) == 0 { + // Empty strings such as from a trailing comma can be ignored. + continue + } + patLev := strings.Split(pat, "=") + if len(patLev) != 2 || len(patLev[0]) == 0 || len(patLev[1]) == 0 { + return errVmoduleSyntax + } + pattern := patLev[0] + v, err := strconv.Atoi(patLev[1]) + if err != nil { + return errors.New("syntax error: expect comma-separated list of filename=N") + } + if v < 0 { + return errors.New("negative value for vmodule level") + } + if v == 0 { + continue // Ignore. It's harmless but no point in paying the overhead. + } + // TODO: check syntax of filter? + filter = append(filter, modulePat{pattern, isLiteral(pattern), Level(v)}) + } + logging.mu.Lock() + defer logging.mu.Unlock() + logging.setVState(logging.verbosity, filter, true) + return nil +} + +// isLiteral reports whether the pattern is a literal string, that is, has no metacharacters +// that require filepath.Match to be called to match the pattern. +func isLiteral(pattern string) bool { + return !strings.ContainsAny(pattern, `\*?[]`) +} + +// traceLocation represents the setting of the -log_backtrace_at flag. +type traceLocation struct { + file string + line int +} + +// isSet reports whether the trace location has been specified. +// logging.mu is held. +func (t *traceLocation) isSet() bool { + return t.line > 0 +} + +// match reports whether the specified file and line matches the trace location. +// The argument file name is the full path, not the basename specified in the flag. +// logging.mu is held. +func (t *traceLocation) match(file string, line int) bool { + if t.line != line { + return false + } + if i := strings.LastIndex(file, "/"); i >= 0 { + file = file[i+1:] + } + return t.file == file +} + +func (t *traceLocation) String() string { + // Lock because the type is not atomic. TODO: clean this up. + logging.mu.Lock() + defer logging.mu.Unlock() + return fmt.Sprintf("%s:%d", t.file, t.line) +} + +// Get is part of the (Go 1.2) flag.Getter interface. It always returns nil for this flag type since the +// struct is not exported +func (t *traceLocation) Get() interface{} { + return nil +} + +var errTraceSyntax = errors.New("syntax error: expect file.go:234") + +// Syntax: -log_backtrace_at=gopherflakes.go:234 +// Note that unlike vmodule the file extension is included here. +func (t *traceLocation) Set(value string) error { + if value == "" { + // Unset. + t.line = 0 + t.file = "" + } + fields := strings.Split(value, ":") + if len(fields) != 2 { + return errTraceSyntax + } + file, line := fields[0], fields[1] + if !strings.Contains(file, ".") { + return errTraceSyntax + } + v, err := strconv.Atoi(line) + if err != nil { + return errTraceSyntax + } + if v <= 0 { + return errors.New("negative or zero value for level") + } + logging.mu.Lock() + defer logging.mu.Unlock() + t.line = v + t.file = file + return nil +} + +// flushSyncWriter is the interface satisfied by logging destinations. +type flushSyncWriter interface { + Flush() error + Sync() error + io.Writer +} + +func init() { + flag.BoolVar(&logging.toStderr, "logtostderr", false, "log to standard error instead of files") + flag.BoolVar(&logging.alsoToStderr, "alsologtostderr", true, "log to standard error as well as files") + flag.Var(&logging.verbosity, "v", "log level for V logs") + flag.Var(&logging.stderrThreshold, "stderrthreshold", "logs at or above this threshold go to stderr") + flag.Var(&logging.vmodule, "vmodule", "comma-separated list of pattern=N settings for file-filtered logging") + flag.Var(&logging.traceLocation, "log_backtrace_at", "when logging hits line file:N, emit a stack trace") + + // Default stderrThreshold is ERROR. + logging.stderrThreshold = errorLog + + logging.setVState(0, nil, false) + go logging.flushDaemon() +} + +// Flush flushes all pending log I/O. +func Flush() { + logging.lockAndFlushAll() +} + +// loggingT collects all the global state of the logging setup. +type loggingT struct { + // Boolean flags. Not handled atomically because the flag.Value interface + // does not let us avoid the =true, and that shorthand is necessary for + // compatibility. TODO: does this matter enough to fix? Seems unlikely. + toStderr bool // The -logtostderr flag. + alsoToStderr bool // The -alsologtostderr flag. + + // Level flag. Handled atomically. + stderrThreshold severity // The -stderrthreshold flag. + + // freeList is a list of byte buffers, maintained under freeListMu. + freeList *buffer + // freeListMu maintains the free list. It is separate from the main mutex + // so buffers can be grabbed and printed to without holding the main lock, + // for better parallelization. + freeListMu sync.Mutex + + // mu protects the remaining elements of this structure and is + // used to synchronize logging. + mu sync.Mutex + // file holds writer for each of the log types. + file [numSeverity]flushSyncWriter + // pcs is used in V to avoid an allocation when computing the caller's PC. + pcs [1]uintptr + // vmap is a cache of the V Level for each V() call site, identified by PC. + // It is wiped whenever the vmodule flag changes state. + vmap map[uintptr]Level + // filterLength stores the length of the vmodule filter chain. If greater + // than zero, it means vmodule is enabled. It may be read safely + // using sync.LoadInt32, but is only modified under mu. + filterLength int32 + // traceLocation is the state of the -log_backtrace_at flag. + traceLocation traceLocation + // These flags are modified only under lock, although verbosity may be fetched + // safely using atomic.LoadInt32. + vmodule moduleSpec // The state of the -vmodule flag. + verbosity Level // V logging level, the value of the -v flag/ + + // added by seaweedfs + exited bool +} + +// buffer holds a byte Buffer for reuse. The zero value is ready for use. +type buffer struct { + bytes.Buffer + tmp [64]byte // temporary byte array for creating headers. + next *buffer +} + +var logging loggingT + +// setVState sets a consistent state for V logging. +// l.mu is held. +func (l *loggingT) setVState(verbosity Level, filter []modulePat, setFilter bool) { + // Turn verbosity off so V will not fire while we are in transition. + logging.verbosity.set(0) + // Ditto for filter length. + atomic.StoreInt32(&logging.filterLength, 0) + + // Set the new filters and wipe the pc->Level map if the filter has changed. + if setFilter { + logging.vmodule.filter = filter + logging.vmap = make(map[uintptr]Level) + } + + // Things are consistent now, so enable filtering and verbosity. + // They are enabled in order opposite to that in V. + atomic.StoreInt32(&logging.filterLength, int32(len(filter))) + logging.verbosity.set(verbosity) +} + +// getBuffer returns a new, ready-to-use buffer. +func (l *loggingT) getBuffer() *buffer { + l.freeListMu.Lock() + b := l.freeList + if b != nil { + l.freeList = b.next + } + l.freeListMu.Unlock() + if b == nil { + b = new(buffer) + } else { + b.next = nil + b.Reset() + } + return b +} + +// putBuffer returns a buffer to the free list. +func (l *loggingT) putBuffer(b *buffer) { + if b.Len() >= 256 { + // Let big buffers die a natural death. + return + } + l.freeListMu.Lock() + b.next = l.freeList + l.freeList = b + l.freeListMu.Unlock() +} + +var timeNow = time.Now // Stubbed out for testing. + +/* +header formats a log header as defined by the C++ implementation. +It returns a buffer containing the formatted header and the user's file and line number. +The depth specifies how many stack frames above lives the source line to be identified in the log message. + +Log lines have this form: + Lmmdd hh:mm:ss.uuuuuu threadid file:line] msg... +where the fields are defined as follows: + L A single character, representing the log level (eg 'I' for INFO) + mm The month (zero padded; ie May is '05') + dd The day (zero padded) + hh:mm:ss.uuuuuu Time in hours, minutes and fractional seconds + threadid The space-padded thread ID as returned by GetTID() + file The file name + line The line number + msg The user-supplied message +*/ +func (l *loggingT) header(s severity, depth int) (*buffer, string, int) { + _, file, line, ok := runtime.Caller(3 + depth) + if !ok { + file = "???" + line = 1 + } else { + slash := strings.LastIndex(file, "/") + if slash >= 0 { + file = file[slash+1:] + } + } + return l.formatHeader(s, file, line), file, line +} + +// formatHeader formats a log header using the provided file name and line number. +func (l *loggingT) formatHeader(s severity, file string, line int) *buffer { + now := timeNow() + if line < 0 { + line = 0 // not a real line number, but acceptable to someDigits + } + if s > fatalLog { + s = infoLog // for safety. + } + buf := l.getBuffer() + + // Avoid Fprintf, for speed. The format is so simple that we can do it quickly by hand. + // It's worth about 3X. Fprintf is hard. + _, month, day := now.Date() + hour, minute, second := now.Clock() + // Lmmdd hh:mm:ss.uuuuuu threadid file:line] + buf.tmp[0] = severityChar[s] + buf.twoDigits(1, int(month)) + buf.twoDigits(3, day) + buf.tmp[5] = ' ' + buf.twoDigits(6, hour) + buf.tmp[8] = ':' + buf.twoDigits(9, minute) + buf.tmp[11] = ':' + buf.twoDigits(12, second) + buf.tmp[14] = ' ' + buf.nDigits(5, 15, pid, ' ') // TODO: should be TID + buf.tmp[20] = ' ' + buf.Write(buf.tmp[:21]) + buf.WriteString(file) + buf.tmp[0] = ':' + n := buf.someDigits(1, line) + buf.tmp[n+1] = ']' + buf.tmp[n+2] = ' ' + buf.Write(buf.tmp[:n+3]) + return buf +} + +// Some custom tiny helper functions to print the log header efficiently. + +const digits = "0123456789" + +// twoDigits formats a zero-prefixed two-digit integer at buf.tmp[i]. +func (buf *buffer) twoDigits(i, d int) { + buf.tmp[i+1] = digits[d%10] + d /= 10 + buf.tmp[i] = digits[d%10] +} + +// nDigits formats an n-digit integer at buf.tmp[i], +// padding with pad on the left. +// It assumes d >= 0. +func (buf *buffer) nDigits(n, i, d int, pad byte) { + j := n - 1 + for ; j >= 0 && d > 0; j-- { + buf.tmp[i+j] = digits[d%10] + d /= 10 + } + for ; j >= 0; j-- { + buf.tmp[i+j] = pad + } +} + +// someDigits formats a zero-prefixed variable-width integer at buf.tmp[i]. +func (buf *buffer) someDigits(i, d int) int { + // Print into the top, then copy down. We know there's space for at least + // a 10-digit number. + j := len(buf.tmp) + for { + j-- + buf.tmp[j] = digits[d%10] + d /= 10 + if d == 0 { + break + } + } + return copy(buf.tmp[i:], buf.tmp[j:]) +} + +func (l *loggingT) println(s severity, args ...interface{}) { + buf, file, line := l.header(s, 0) + fmt.Fprintln(buf, args...) + l.output(s, buf, file, line, false) +} + +func (l *loggingT) print(s severity, args ...interface{}) { + l.printDepth(s, 1, args...) +} + +func (l *loggingT) printDepth(s severity, depth int, args ...interface{}) { + buf, file, line := l.header(s, depth) + fmt.Fprint(buf, args...) + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } + l.output(s, buf, file, line, false) +} + +func (l *loggingT) printf(s severity, format string, args ...interface{}) { + buf, file, line := l.header(s, 0) + fmt.Fprintf(buf, format, args...) + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } + l.output(s, buf, file, line, false) +} + +// printWithFileLine behaves like print but uses the provided file and line number. If +// alsoLogToStderr is true, the log message always appears on standard error; it +// will also appear in the log file unless --logtostderr is set. +func (l *loggingT) printWithFileLine(s severity, file string, line int, alsoToStderr bool, args ...interface{}) { + buf := l.formatHeader(s, file, line) + fmt.Fprint(buf, args...) + if buf.Bytes()[buf.Len()-1] != '\n' { + buf.WriteByte('\n') + } + l.output(s, buf, file, line, alsoToStderr) +} + +// output writes the data to the log files and releases the buffer. +func (l *loggingT) output(s severity, buf *buffer, file string, line int, alsoToStderr bool) { + l.mu.Lock() + if l.traceLocation.isSet() { + if l.traceLocation.match(file, line) { + buf.Write(stacks(false)) + } + } + data := buf.Bytes() + if l.toStderr { + os.Stderr.Write(data) + } else { + if alsoToStderr || l.alsoToStderr || s >= l.stderrThreshold.get() { + os.Stderr.Write(data) + } + if l.file[s] == nil { + if err := l.createFiles(s); err != nil { + os.Stderr.Write(data) // Make sure the message appears somewhere. + l.exit(err) + } + } + switch s { + case fatalLog: + l.file[fatalLog].Write(data) + fallthrough + case errorLog: + l.file[errorLog].Write(data) + fallthrough + case warningLog: + l.file[warningLog].Write(data) + fallthrough + case infoLog: + l.file[infoLog].Write(data) + } + } + if s == fatalLog { + // If we got here via Exit rather than Fatal, print no stacks. + if atomic.LoadUint32(&fatalNoStacks) > 0 { + l.mu.Unlock() + timeoutFlush(10 * time.Second) + os.Exit(1) + } + // Dump all goroutine stacks before exiting. + // First, make sure we see the trace for the current goroutine on standard error. + // If -logtostderr has been specified, the loop below will do that anyway + // as the first stack in the full dump. + if !l.toStderr { + os.Stderr.Write(stacks(false)) + } + // Write the stack trace for all goroutines to the files. + trace := stacks(true) + logExitFunc = func(error) {} // If we get a write error, we'll still exit below. + for log := fatalLog; log >= infoLog; log-- { + if f := l.file[log]; f != nil { // Can be nil if -logtostderr is set. + f.Write(trace) + } + } + l.mu.Unlock() + timeoutFlush(10 * time.Second) + os.Exit(255) // C++ uses -1, which is silly because it's anded with 255 anyway. + } + l.putBuffer(buf) + l.mu.Unlock() + if stats := severityStats[s]; stats != nil { + atomic.AddInt64(&stats.lines, 1) + atomic.AddInt64(&stats.bytes, int64(len(data))) + } +} + +// timeoutFlush calls Flush and returns when it completes or after timeout +// elapses, whichever happens first. This is needed because the hooks invoked +// by Flush may deadlock when glog.Fatal is called from a hook that holds +// a lock. +func timeoutFlush(timeout time.Duration) { + done := make(chan bool, 1) + go func() { + Flush() // calls logging.lockAndFlushAll() + done <- true + }() + select { + case <-done: + case <-time.After(timeout): + fmt.Fprintln(os.Stderr, "glog: Flush took longer than", timeout) + } +} + +// stacks is a wrapper for runtime.Stack that attempts to recover the data for all goroutines. +func stacks(all bool) []byte { + // We don't know how big the traces are, so grow a few times if they don't fit. Start large, though. + n := 10000 + if all { + n = 100000 + } + var trace []byte + for i := 0; i < 5; i++ { + trace = make([]byte, n) + nbytes := runtime.Stack(trace, all) + if nbytes < len(trace) { + return trace[:nbytes] + } + n *= 2 + } + return trace +} + +// logExitFunc provides a simple mechanism to override the default behavior +// of exiting on error. Used in testing and to guarantee we reach a required exit +// for fatal logs. Instead, exit could be a function rather than a method but that +// would make its use clumsier. +var logExitFunc func(error) + +// exit is called if there is trouble creating or writing log files. +// It flushes the logs and exits the program; there's no point in hanging around. +// l.mu is held. +func (l *loggingT) exit(err error) { + fmt.Fprintf(os.Stderr, "glog: exiting because of error: %s\n", err) + // If logExitFunc is set, we do that instead of exiting. + if logExitFunc != nil { + logExitFunc(err) + return + } + l.flushAll() + l.exited = true // os.Exit(2) +} + +// syncBuffer joins a bufio.Writer to its underlying file, providing access to the +// file's Sync method and providing a wrapper for the Write method that provides log +// file rotation. There are conflicting methods, so the file cannot be embedded. +// l.mu is held for all its methods. +type syncBuffer struct { + logger *loggingT + *bufio.Writer + file *os.File + sev severity + nbytes uint64 // The number of bytes written to this file +} + +func (sb *syncBuffer) Sync() error { + return sb.file.Sync() +} + +func (sb *syncBuffer) Write(p []byte) (n int, err error) { + if sb.logger.exited { + return + } + if sb.nbytes+uint64(len(p)) >= MaxSize { + if err := sb.rotateFile(time.Now()); err != nil { + sb.logger.exit(err) + } + } + n, err = sb.Writer.Write(p) + sb.nbytes += uint64(n) + if err != nil { + sb.logger.exit(err) + } + return +} + +// rotateFile closes the syncBuffer's file and starts a new one. +func (sb *syncBuffer) rotateFile(now time.Time) error { + if sb.file != nil { + sb.Flush() + sb.file.Close() + } + var err error + sb.file, _, err = create(severityName[sb.sev], now) + sb.nbytes = 0 + if err != nil { + return err + } + + sb.Writer = bufio.NewWriterSize(sb.file, bufferSize) + + // Write header. + var buf bytes.Buffer + fmt.Fprintf(&buf, "Log file created at: %s\n", now.Format("2006/01/02 15:04:05")) + fmt.Fprintf(&buf, "Running on machine: %s\n", host) + fmt.Fprintf(&buf, "Binary: Built with %s %s for %s/%s\n", runtime.Compiler, runtime.Version(), runtime.GOOS, runtime.GOARCH) + fmt.Fprintf(&buf, "Log line format: [IWEF]mmdd hh:mm:ss threadid file:line] msg\n") + n, err := sb.file.Write(buf.Bytes()) + sb.nbytes += uint64(n) + return err +} + +// bufferSize sizes the buffer associated with each log file. It's large +// so that log records can accumulate without the logging thread blocking +// on disk I/O. The flushDaemon will block instead. +const bufferSize = 256 * 1024 + +// createFiles creates all the log files for severity from sev down to infoLog. +// l.mu is held. +func (l *loggingT) createFiles(sev severity) error { + now := time.Now() + // Files are created in decreasing severity order, so as soon as we find one + // has already been created, we can stop. + for s := sev; s >= infoLog && l.file[s] == nil; s-- { + sb := &syncBuffer{ + logger: l, + sev: s, + } + if err := sb.rotateFile(now); err != nil { + return err + } + l.file[s] = sb + } + return nil +} + +const flushInterval = 30 * time.Second + +// flushDaemon periodically flushes the log file buffers. +func (l *loggingT) flushDaemon() { + for _ = range time.NewTicker(flushInterval).C { + l.lockAndFlushAll() + } +} + +// lockAndFlushAll is like flushAll but locks l.mu first. +func (l *loggingT) lockAndFlushAll() { + l.mu.Lock() + l.flushAll() + l.mu.Unlock() +} + +// flushAll flushes all the logs and attempts to "sync" their data to disk. +// l.mu is held. +func (l *loggingT) flushAll() { + // Flush from fatal down, in case there's trouble flushing. + for s := fatalLog; s >= infoLog; s-- { + file := l.file[s] + if file != nil { + file.Flush() // ignore error + file.Sync() // ignore error + } + } +} + +// CopyStandardLogTo arranges for messages written to the Go "log" package's +// default logs to also appear in the Google logs for the named and lower +// severities. Subsequent changes to the standard log's default output location +// or format may break this behavior. +// +// Valid names are "INFO", "WARNING", "ERROR", and "FATAL". If the name is not +// recognized, CopyStandardLogTo panics. +func CopyStandardLogTo(name string) { + sev, ok := severityByName(name) + if !ok { + panic(fmt.Sprintf("log.CopyStandardLogTo(%q): unrecognized severity name", name)) + } + // Set a log format that captures the user's file and line: + // d.go:23: message + stdLog.SetFlags(stdLog.Lshortfile) + stdLog.SetOutput(logBridge(sev)) +} + +// logBridge provides the Write method that enables CopyStandardLogTo to connect +// Go's standard logs to the logs provided by this package. +type logBridge severity + +// Write parses the standard logging line and passes its components to the +// logger for severity(lb). +func (lb logBridge) Write(b []byte) (n int, err error) { + var ( + file = "???" + line = 1 + text string + ) + // Split "d.go:23: message" into "d.go", "23", and "message". + if parts := bytes.SplitN(b, []byte{':'}, 3); len(parts) != 3 || len(parts[0]) < 1 || len(parts[2]) < 1 { + text = fmt.Sprintf("bad log format: %s", b) + } else { + file = string(parts[0]) + text = string(parts[2][1:]) // skip leading space + line, err = strconv.Atoi(string(parts[1])) + if err != nil { + text = fmt.Sprintf("bad line number: %s", b) + line = 1 + } + } + // printWithFileLine with alsoToStderr=true, so standard log messages + // always appear on standard error. + logging.printWithFileLine(severity(lb), file, line, true, text) + return len(b), nil +} + +// setV computes and remembers the V level for a given PC +// when vmodule is enabled. +// File pattern matching takes the basename of the file, stripped +// of its .go suffix, and uses filepath.Match, which is a little more +// general than the *? matching used in C++. +// l.mu is held. +func (l *loggingT) setV(pc uintptr) Level { + fn := runtime.FuncForPC(pc) + file, _ := fn.FileLine(pc) + // The file is something like /a/b/c/d.go. We want just the d. + if strings.HasSuffix(file, ".go") { + file = file[:len(file)-3] + } + if slash := strings.LastIndex(file, "/"); slash >= 0 { + file = file[slash+1:] + } + for _, filter := range l.vmodule.filter { + if filter.match(file) { + l.vmap[pc] = filter.level + return filter.level + } + } + l.vmap[pc] = 0 + return 0 +} + +// Verbose is a boolean type that implements Infof (like Printf) etc. +// See the documentation of V for more information. +type Verbose bool + +// V reports whether verbosity at the call site is at least the requested level. +// The returned value is a boolean of type Verbose, which implements Info, Infoln +// and Infof. These methods will write to the Info log if called. +// Thus, one may write either +// if glog.V(2) { glog.Info("log this") } +// or +// glog.V(2).Info("log this") +// The second form is shorter but the first is cheaper if logging is off because it does +// not evaluate its arguments. +// +// Whether an individual call to V generates a log record depends on the setting of +// the -v and --vmodule flags; both are off by default. If the level in the call to +// V is at least the value of -v, or of -vmodule for the source file containing the +// call, the V call will log. +func V(level Level) Verbose { + // This function tries hard to be cheap unless there's work to do. + // The fast path is two atomic loads and compares. + + // Here is a cheap but safe test to see if V logging is enabled globally. + if logging.verbosity.get() >= level { + return Verbose(true) + } + + // It's off globally but it vmodule may still be set. + // Here is another cheap but safe test to see if vmodule is enabled. + if atomic.LoadInt32(&logging.filterLength) > 0 { + // Now we need a proper lock to use the logging structure. The pcs field + // is shared so we must lock before accessing it. This is fairly expensive, + // but if V logging is enabled we're slow anyway. + logging.mu.Lock() + defer logging.mu.Unlock() + if runtime.Callers(2, logging.pcs[:]) == 0 { + return Verbose(false) + } + v, ok := logging.vmap[logging.pcs[0]] + if !ok { + v = logging.setV(logging.pcs[0]) + } + return Verbose(v >= level) + } + return Verbose(false) +} + +// Info is equivalent to the global Info function, guarded by the value of v. +// See the documentation of V for usage. +func (v Verbose) Info(args ...interface{}) { + if v { + logging.print(infoLog, args...) + } +} + +// Infoln is equivalent to the global Infoln function, guarded by the value of v. +// See the documentation of V for usage. +func (v Verbose) Infoln(args ...interface{}) { + if v { + logging.println(infoLog, args...) + } +} + +// Infof is equivalent to the global Infof function, guarded by the value of v. +// See the documentation of V for usage. +func (v Verbose) Infof(format string, args ...interface{}) { + if v { + logging.printf(infoLog, format, args...) + } +} + +// Info logs to the INFO log. +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Info(args ...interface{}) { + logging.print(infoLog, args...) +} + +// InfoDepth acts as Info but uses depth to determine which call frame to log. +// InfoDepth(0, "msg") is the same as Info("msg"). +func InfoDepth(depth int, args ...interface{}) { + logging.printDepth(infoLog, depth, args...) +} + +// Infoln logs to the INFO log. +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Infoln(args ...interface{}) { + logging.println(infoLog, args...) +} + +// Infof logs to the INFO log. +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Infof(format string, args ...interface{}) { + logging.printf(infoLog, format, args...) +} + +// Warning logs to the WARNING and INFO logs. +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Warning(args ...interface{}) { + logging.print(warningLog, args...) +} + +// WarningDepth acts as Warning but uses depth to determine which call frame to log. +// WarningDepth(0, "msg") is the same as Warning("msg"). +func WarningDepth(depth int, args ...interface{}) { + logging.printDepth(warningLog, depth, args...) +} + +// Warningln logs to the WARNING and INFO logs. +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Warningln(args ...interface{}) { + logging.println(warningLog, args...) +} + +// Warningf logs to the WARNING and INFO logs. +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Warningf(format string, args ...interface{}) { + logging.printf(warningLog, format, args...) +} + +// Error logs to the ERROR, WARNING, and INFO logs. +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Error(args ...interface{}) { + logging.print(errorLog, args...) +} + +// ErrorDepth acts as Error but uses depth to determine which call frame to log. +// ErrorDepth(0, "msg") is the same as Error("msg"). +func ErrorDepth(depth int, args ...interface{}) { + logging.printDepth(errorLog, depth, args...) +} + +// Errorln logs to the ERROR, WARNING, and INFO logs. +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Errorln(args ...interface{}) { + logging.println(errorLog, args...) +} + +// Errorf logs to the ERROR, WARNING, and INFO logs. +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Errorf(format string, args ...interface{}) { + logging.printf(errorLog, format, args...) +} + +// Fatal logs to the FATAL, ERROR, WARNING, and INFO logs, +// including a stack trace of all running goroutines, then calls os.Exit(255). +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Fatal(args ...interface{}) { + logging.print(fatalLog, args...) +} + +// FatalDepth acts as Fatal but uses depth to determine which call frame to log. +// FatalDepth(0, "msg") is the same as Fatal("msg"). +func FatalDepth(depth int, args ...interface{}) { + logging.printDepth(fatalLog, depth, args...) +} + +// Fatalln logs to the FATAL, ERROR, WARNING, and INFO logs, +// including a stack trace of all running goroutines, then calls os.Exit(255). +// Arguments are handled in the manner of fmt.Println; a newline is appended if missing. +func Fatalln(args ...interface{}) { + logging.println(fatalLog, args...) +} + +// Fatalf logs to the FATAL, ERROR, WARNING, and INFO logs, +// including a stack trace of all running goroutines, then calls os.Exit(255). +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Fatalf(format string, args ...interface{}) { + logging.printf(fatalLog, format, args...) +} + +// fatalNoStacks is non-zero if we are to exit without dumping goroutine stacks. +// It allows Exit and relatives to use the Fatal logs. +var fatalNoStacks uint32 + +// Exit logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +// Arguments are handled in the manner of fmt.Print; a newline is appended if missing. +func Exit(args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.print(fatalLog, args...) +} + +// ExitDepth acts as Exit but uses depth to determine which call frame to log. +// ExitDepth(0, "msg") is the same as Exit("msg"). +func ExitDepth(depth int, args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.printDepth(fatalLog, depth, args...) +} + +// Exitln logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +func Exitln(args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.println(fatalLog, args...) +} + +// Exitf logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. +func Exitf(format string, args ...interface{}) { + atomic.StoreUint32(&fatalNoStacks, 1) + logging.printf(fatalLog, format, args...) +} diff --git a/weed/glog/glog_file.go b/weed/glog/glog_file.go new file mode 100644 index 000000000..65075d281 --- /dev/null +++ b/weed/glog/glog_file.go @@ -0,0 +1,124 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// File I/O for logs. + +package glog + +import ( + "errors" + "flag" + "fmt" + "os" + "os/user" + "path/filepath" + "strings" + "sync" + "time" +) + +// MaxSize is the maximum size of a log file in bytes. +var MaxSize uint64 = 1024 * 1024 * 1800 + +// logDirs lists the candidate directories for new log files. +var logDirs []string + +// If non-empty, overrides the choice of directory in which to write logs. +// See createLogDirs for the full list of possible destinations. +var logDir = flag.String("log_dir", "", "If non-empty, write log files in this directory") + +func createLogDirs() { + if *logDir != "" { + logDirs = append(logDirs, *logDir) + } + logDirs = append(logDirs, os.TempDir()) +} + +var ( + pid = os.Getpid() + program = filepath.Base(os.Args[0]) + host = "unknownhost" + userName = "unknownuser" +) + +func init() { + h, err := os.Hostname() + if err == nil { + host = shortHostname(h) + } + + current, err := user.Current() + if err == nil { + userName = current.Username + } + + // Sanitize userName since it may contain filepath separators on Windows. + userName = strings.Replace(userName, `\`, "_", -1) +} + +// shortHostname returns its argument, truncating at the first period. +// For instance, given "www.google.com" it returns "www". +func shortHostname(hostname string) string { + if i := strings.Index(hostname, "."); i >= 0 { + return hostname[:i] + } + return hostname +} + +// logName returns a new log file name containing tag, with start time t, and +// the name for the symlink for tag. +func logName(tag string, t time.Time) (name, link string) { + name = fmt.Sprintf("%s.%s.%s.log.%s.%04d%02d%02d-%02d%02d%02d.%d", + program, + host, + userName, + tag, + t.Year(), + t.Month(), + t.Day(), + t.Hour(), + t.Minute(), + t.Second(), + pid) + return name, program + "." + tag +} + +var onceLogDirs sync.Once + +// create creates a new log file and returns the file and its filename, which +// contains tag ("INFO", "FATAL", etc.) and t. If the file is created +// successfully, create also attempts to update the symlink for that tag, ignoring +// errors. +func create(tag string, t time.Time) (f *os.File, filename string, err error) { + onceLogDirs.Do(createLogDirs) + if len(logDirs) == 0 { + return nil, "", errors.New("log: no log dirs") + } + name, link := logName(tag, t) + var lastErr error + for _, dir := range logDirs { + fname := filepath.Join(dir, name) + f, err := os.Create(fname) + if err == nil { + symlink := filepath.Join(dir, link) + os.Remove(symlink) // ignore err + os.Symlink(name, symlink) // ignore err + return f, fname, nil + } + lastErr = err + } + return nil, "", fmt.Errorf("log: cannot create log: %v", lastErr) +} diff --git a/weed/glog/glog_test.go b/weed/glog/glog_test.go new file mode 100644 index 000000000..12c3acf3d --- /dev/null +++ b/weed/glog/glog_test.go @@ -0,0 +1,415 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package glog + +import ( + "bytes" + "fmt" + stdLog "log" + "path/filepath" + "runtime" + "strconv" + "strings" + "testing" + "time" +) + +// Test that shortHostname works as advertised. +func TestShortHostname(t *testing.T) { + for hostname, expect := range map[string]string{ + "": "", + "host": "host", + "host.google.com": "host", + } { + if got := shortHostname(hostname); expect != got { + t.Errorf("shortHostname(%q): expected %q, got %q", hostname, expect, got) + } + } +} + +// flushBuffer wraps a bytes.Buffer to satisfy flushSyncWriter. +type flushBuffer struct { + bytes.Buffer +} + +func (f *flushBuffer) Flush() error { + return nil +} + +func (f *flushBuffer) Sync() error { + return nil +} + +// swap sets the log writers and returns the old array. +func (l *loggingT) swap(writers [numSeverity]flushSyncWriter) (old [numSeverity]flushSyncWriter) { + l.mu.Lock() + defer l.mu.Unlock() + old = l.file + for i, w := range writers { + logging.file[i] = w + } + return +} + +// newBuffers sets the log writers to all new byte buffers and returns the old array. +func (l *loggingT) newBuffers() [numSeverity]flushSyncWriter { + return l.swap([numSeverity]flushSyncWriter{new(flushBuffer), new(flushBuffer), new(flushBuffer), new(flushBuffer)}) +} + +// contents returns the specified log value as a string. +func contents(s severity) string { + return logging.file[s].(*flushBuffer).String() +} + +// contains reports whether the string is contained in the log. +func contains(s severity, str string, t *testing.T) bool { + return strings.Contains(contents(s), str) +} + +// setFlags configures the logging flags how the test expects them. +func setFlags() { + logging.toStderr = false +} + +// Test that Info works as advertised. +func TestInfo(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + Info("test") + if !contains(infoLog, "I", t) { + t.Errorf("Info has wrong character: %q", contents(infoLog)) + } + if !contains(infoLog, "test", t) { + t.Error("Info failed") + } +} + +func TestInfoDepth(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + + f := func() { InfoDepth(1, "depth-test1") } + + // The next three lines must stay together + _, _, wantLine, _ := runtime.Caller(0) + InfoDepth(0, "depth-test0") + f() + + msgs := strings.Split(strings.TrimSuffix(contents(infoLog), "\n"), "\n") + if len(msgs) != 2 { + t.Fatalf("Got %d lines, expected 2", len(msgs)) + } + + for i, m := range msgs { + if !strings.HasPrefix(m, "I") { + t.Errorf("InfoDepth[%d] has wrong character: %q", i, m) + } + w := fmt.Sprintf("depth-test%d", i) + if !strings.Contains(m, w) { + t.Errorf("InfoDepth[%d] missing %q: %q", i, w, m) + } + + // pull out the line number (between : and ]) + msg := m[strings.LastIndex(m, ":")+1:] + x := strings.Index(msg, "]") + if x < 0 { + t.Errorf("InfoDepth[%d]: missing ']': %q", i, m) + continue + } + line, err := strconv.Atoi(msg[:x]) + if err != nil { + t.Errorf("InfoDepth[%d]: bad line number: %q", i, m) + continue + } + wantLine++ + if wantLine != line { + t.Errorf("InfoDepth[%d]: got line %d, want %d", i, line, wantLine) + } + } +} + +func init() { + CopyStandardLogTo("INFO") +} + +// Test that CopyStandardLogTo panics on bad input. +func TestCopyStandardLogToPanic(t *testing.T) { + defer func() { + if s, ok := recover().(string); !ok || !strings.Contains(s, "LOG") { + t.Errorf(`CopyStandardLogTo("LOG") should have panicked: %v`, s) + } + }() + CopyStandardLogTo("LOG") +} + +// Test that using the standard log package logs to INFO. +func TestStandardLog(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + stdLog.Print("test") + if !contains(infoLog, "I", t) { + t.Errorf("Info has wrong character: %q", contents(infoLog)) + } + if !contains(infoLog, "test", t) { + t.Error("Info failed") + } +} + +// Test that the header has the correct format. +func TestHeader(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + defer func(previous func() time.Time) { timeNow = previous }(timeNow) + timeNow = func() time.Time { + return time.Date(2006, 1, 2, 15, 4, 5, .067890e9, time.Local) + } + pid = 1234 + Info("test") + var line int + format := "I0102 15:04:05 1234 glog_test.go:%d] test\n" + n, err := fmt.Sscanf(contents(infoLog), format, &line) + if n != 1 || err != nil { + t.Errorf("log format error: %d elements, error %s:\n%s", n, err, contents(infoLog)) + } + // Scanf treats multiple spaces as equivalent to a single space, + // so check for correct space-padding also. + want := fmt.Sprintf(format, line) + if contents(infoLog) != want { + t.Errorf("log format error: got:\n\t%q\nwant:\t%q", contents(infoLog), want) + } +} + +// Test that an Error log goes to Warning and Info. +// Even in the Info log, the source character will be E, so the data should +// all be identical. +func TestError(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + Error("test") + if !contains(errorLog, "E", t) { + t.Errorf("Error has wrong character: %q", contents(errorLog)) + } + if !contains(errorLog, "test", t) { + t.Error("Error failed") + } + str := contents(errorLog) + if !contains(warningLog, str, t) { + t.Error("Warning failed") + } + if !contains(infoLog, str, t) { + t.Error("Info failed") + } +} + +// Test that a Warning log goes to Info. +// Even in the Info log, the source character will be W, so the data should +// all be identical. +func TestWarning(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + Warning("test") + if !contains(warningLog, "W", t) { + t.Errorf("Warning has wrong character: %q", contents(warningLog)) + } + if !contains(warningLog, "test", t) { + t.Error("Warning failed") + } + str := contents(warningLog) + if !contains(infoLog, str, t) { + t.Error("Info failed") + } +} + +// Test that a V log goes to Info. +func TestV(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + logging.verbosity.Set("2") + defer logging.verbosity.Set("0") + V(2).Info("test") + if !contains(infoLog, "I", t) { + t.Errorf("Info has wrong character: %q", contents(infoLog)) + } + if !contains(infoLog, "test", t) { + t.Error("Info failed") + } +} + +// Test that a vmodule enables a log in this file. +func TestVmoduleOn(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + logging.vmodule.Set("glog_test=2") + defer logging.vmodule.Set("") + if !V(1) { + t.Error("V not enabled for 1") + } + if !V(2) { + t.Error("V not enabled for 2") + } + if V(3) { + t.Error("V enabled for 3") + } + V(2).Info("test") + if !contains(infoLog, "I", t) { + t.Errorf("Info has wrong character: %q", contents(infoLog)) + } + if !contains(infoLog, "test", t) { + t.Error("Info failed") + } +} + +// Test that a vmodule of another file does not enable a log in this file. +func TestVmoduleOff(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + logging.vmodule.Set("notthisfile=2") + defer logging.vmodule.Set("") + for i := 1; i <= 3; i++ { + if V(Level(i)) { + t.Errorf("V enabled for %d", i) + } + } + V(2).Info("test") + if contents(infoLog) != "" { + t.Error("V logged incorrectly") + } +} + +// vGlobs are patterns that match/don't match this file at V=2. +var vGlobs = map[string]bool{ + // Easy to test the numeric match here. + "glog_test=1": false, // If -vmodule sets V to 1, V(2) will fail. + "glog_test=2": true, + "glog_test=3": true, // If -vmodule sets V to 1, V(3) will succeed. + // These all use 2 and check the patterns. All are true. + "*=2": true, + "?l*=2": true, + "????_*=2": true, + "??[mno]?_*t=2": true, + // These all use 2 and check the patterns. All are false. + "*x=2": false, + "m*=2": false, + "??_*=2": false, + "?[abc]?_*t=2": false, +} + +// Test that vmodule globbing works as advertised. +func testVmoduleGlob(pat string, match bool, t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + defer logging.vmodule.Set("") + logging.vmodule.Set(pat) + if V(2) != Verbose(match) { + t.Errorf("incorrect match for %q: got %t expected %t", pat, V(2), match) + } +} + +// Test that a vmodule globbing works as advertised. +func TestVmoduleGlob(t *testing.T) { + for glob, match := range vGlobs { + testVmoduleGlob(glob, match, t) + } +} + +func TestRollover(t *testing.T) { + setFlags() + var err error + defer func(previous func(error)) { logExitFunc = previous }(logExitFunc) + logExitFunc = func(e error) { + err = e + } + defer func(previous uint64) { MaxSize = previous }(MaxSize) + MaxSize = 512 + + Info("x") // Be sure we have a file. + info, ok := logging.file[infoLog].(*syncBuffer) + if !ok { + t.Fatal("info wasn't created") + } + if err != nil { + t.Fatalf("info has initial error: %v", err) + } + fname0 := info.file.Name() + Info(strings.Repeat("x", int(MaxSize))) // force a rollover + if err != nil { + t.Fatalf("info has error after big write: %v", err) + } + + // Make sure the next log file gets a file name with a different + // time stamp. + // + // TODO: determine whether we need to support subsecond log + // rotation. C++ does not appear to handle this case (nor does it + // handle Daylight Savings Time properly). + time.Sleep(1 * time.Second) + + Info("x") // create a new file + if err != nil { + t.Fatalf("error after rotation: %v", err) + } + fname1 := info.file.Name() + if fname0 == fname1 { + t.Errorf("info.f.Name did not change: %v", fname0) + } + if info.nbytes >= MaxSize { + t.Errorf("file size was not reset: %d", info.nbytes) + } +} + +func TestLogBacktraceAt(t *testing.T) { + setFlags() + defer logging.swap(logging.newBuffers()) + // The peculiar style of this code simplifies line counting and maintenance of the + // tracing block below. + var infoLine string + setTraceLocation := func(file string, line int, ok bool, delta int) { + if !ok { + t.Fatal("could not get file:line") + } + _, file = filepath.Split(file) + infoLine = fmt.Sprintf("%s:%d", file, line+delta) + err := logging.traceLocation.Set(infoLine) + if err != nil { + t.Fatal("error setting log_backtrace_at: ", err) + } + } + { + // Start of tracing block. These lines know about each other's relative position. + _, file, line, ok := runtime.Caller(0) + setTraceLocation(file, line, ok, +2) // Two lines between Caller and Info calls. + Info("we want a stack trace here") + } + numAppearances := strings.Count(contents(infoLog), infoLine) + if numAppearances < 2 { + // Need 2 appearances, one in the log header and one in the trace: + // log_test.go:281: I0511 16:36:06.952398 02238 log_test.go:280] we want a stack trace here + // ... + // github.com/glog/glog_test.go:280 (0x41ba91) + // ... + // We could be more precise but that would require knowing the details + // of the traceback format, which may not be dependable. + t.Fatal("got no trace back; log is ", contents(infoLog)) + } +} + +func BenchmarkHeader(b *testing.B) { + for i := 0; i < b.N; i++ { + buf, _, _ := logging.header(infoLog, 0) + logging.putBuffer(buf) + } +} diff --git a/weed/images/favicon.go b/weed/images/favicon.go new file mode 100644 index 000000000..09504976c --- /dev/null +++ b/weed/images/favicon.go @@ -0,0 +1,236 @@ +// Code generated by go-bindata. +// sources: +// favicon/favicon.ico + +package images + +import ( + "bytes" + "compress/gzip" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" + "time" +) + +func bindataRead(data []byte, name string) ([]byte, error) { + gz, err := gzip.NewReader(bytes.NewBuffer(data)) + if err != nil { + return nil, fmt.Errorf("Read %q: %v", name, err) + } + + var buf bytes.Buffer + _, err = io.Copy(&buf, gz) + clErr := gz.Close() + + if err != nil { + return nil, fmt.Errorf("Read %q: %v", name, err) + } + if clErr != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +type asset struct { + bytes []byte + info os.FileInfo +} + +type bindataFileInfo struct { + name string + size int64 + mode os.FileMode + modTime time.Time +} + +func (fi bindataFileInfo) Name() string { + return fi.name +} +func (fi bindataFileInfo) Size() int64 { + return fi.size +} +func (fi bindataFileInfo) Mode() os.FileMode { + return fi.mode +} +func (fi bindataFileInfo) ModTime() time.Time { + return fi.modTime +} +func (fi bindataFileInfo) IsDir() bool { + return false +} +func (fi bindataFileInfo) Sys() interface{} { + return nil +} + +var _favicon = []byte("\x1f\x8b\x08\x00\x00\x09\x6e\x88\x00\xff\xec\x94\x5f\x48\x53\x51\x1c\xc7\xbf\x77\xce\xeb\xf1\xff\x9d\x53\x37\xe7\xbf\x4d\xcd\x2c\x08\xa2\x87\xea\xd1\x82\x9e\xa2\x88\x5e\x7a\x8a\x82\x1e\xf2\x21\x7a\x28\x42\xc9\x7a\xb0\x08\x22\x4d\x08\xfa\x43\x25\x61\x05\x4b\xac\xb7\x7a\x92\x8a\x82\xa2\x34\x59\x5c\x4d\x5b\x39\x2d\xfb\xb3\x9c\xe0\x52\xb7\x5a\x6e\x73\xeb\xdc\xbb\xed\xb6\x5d\xc7\x89\x7a\xa8\xa0\xfb\x81\x7b\xef\x39\xbf\xef\x3d\xf7\xfe\xce\x79\xf8\x00\x1c\x74\x10\x04\xe9\x49\xd0\x94\x09\xd4\x03\xb0\x5a\x63\xf3\x1e\x02\x74\xd2\x5a\x03\xad\x09\x52\x1d\xb1\xba\x0c\x87\x1f\x70\xb1\xab\xff\xd0\x2a\x0c\x9d\xde\x0e\xf1\xe4\x66\xbc\xba\xb2\x0f\xe1\xc0\x3c\x5e\x77\xef\x87\xe7\xe9\x4d\xcc\x3a\x1f\x21\x1a\x8d\x22\x1a\x89\xc0\x7d\xbf\x0b\x03\xcd\xab\x31\xde\x73\x18\x6f\x6e\xb5\x61\xea\xf1\x0d\x84\xbf\xf9\x31\x76\xfd\x20\xbe\x4e\xb9\x20\xb6\x6f\xc5\xb3\xd6\xb5\x78\x7e\x62\x13\x7d\xae\x83\xa3\xad\x11\x73\xae\x7e\xbc\x38\xb3\x03\x23\x67\x77\x62\x61\xd6\x03\x67\x57\x13\x1c\xc7\x37\x62\xa2\xf7\x28\x22\xe1\x10\x7c\x93\x22\x42\x7e\x2f\x82\x73\xd3\x18\x68\x59\x03\x97\xbd\x05\x43\x1d\xdb\xa0\xa1\xa1\xf1\xaf\x51\x5b\x97\x3c\xab\x29\x20\xe6\xa4\x69\x71\x35\x21\x56\x1b\x25\x3e\xaf\xac\x22\x31\x12\x2f\x94\x59\x48\x79\x05\x45\x7e\xb9\x84\xde\x4a\x4d\xca\xfa\x0c\x43\x11\xbd\xe7\x1a\xa5\x71\x3e\xa5\x80\x14\x0a\x89\x2c\xfe\x19\x92\x17\x9f\xf3\x94\x2c\x92\x9d\x93\x9b\xf4\xf3\x0c\x7d\x66\x4a\xa7\x9c\xee\xb7\x37\xf9\xcb\x48\x9e\xb5\xd2\xab\x11\x49\x9e\xd5\x27\x37\x03\xc5\xb3\x8e\x63\x1b\xe0\x79\xd2\x8b\x4f\x0f\xaf\xca\x2e\x95\x1c\x2b\xf9\x75\x7e\x7c\x50\x76\xe0\xe8\xf9\x5d\xb2\x2f\xbd\xc3\x77\x11\x59\x0c\x61\xf2\x76\x3b\x66\xc4\x3e\x0c\x1e\x59\x8f\xc0\xf4\x84\xec\xd6\x0f\x7d\xe7\xe0\x7f\x3f\x22\x7b\x33\xf4\xe5\xb3\xec\x54\xc9\x9d\x8b\xc1\x00\x75\x73\x2b\x86\xa9\xcb\xdf\xdd\xe9\x94\x1d\xed\xb2\x37\x53\x4f\xdb\xe1\x7b\x2b\xe2\xe3\xbd\x4b\x78\x79\x71\x0f\x82\xbe\x19\x38\x2f\xef\xa5\xdf\xee\xc0\xe8\x85\xdd\x58\xf0\xba\xe1\x7e\xd0\x8d\xb1\x6b\x07\x64\x9f\x8b\xa7\xb6\xfc\xb9\xc3\xd3\xd0\xd0\xf8\x7b\xac\xb4\x31\xe3\x86\x15\x15\x8c\x54\xbf\x9c\xe7\x19\x39\x97\xc5\xb3\xf2\x65\x44\x32\x79\xbd\x31\x81\x2a\xae\xcb\xe3\x53\x49\x8d\x6b\x79\x35\xaa\xf5\xb6\x1a\x76\x0e\x43\xb5\x54\xb5\xe6\x24\x58\xd2\xa0\xa9\x8a\xd9\x3f\xa5\xd2\x9c\x2e\x2f\xb7\x28\x43\x8b\x90\x26\xcf\x34\x97\x29\xe3\x42\x4b\x9a\x9c\xe7\x75\x26\xc6\x5f\x69\xce\x1b\x0d\x4b\xca\xca\x41\x16\x4b\xfb\x2a\x29\x55\xe7\xea\xf3\x2a\xfa\x49\x2e\x18\x98\x79\x41\xa1\x6a\xbd\x72\x90\xb9\x34\xcd\xcb\x67\xf6\x4f\xb2\x99\xfb\xcb\xe2\x18\x31\x74\x19\x7a\x56\x0c\xe6\xe2\xff\x97\xef\x01\x00\x00\xff\xff\x3e\xd4\x17\xe7\x36\x0e\x00\x00") + +func faviconBytes() ([]byte, error) { + return bindataRead( + _favicon, + "favicon/favicon.ico", + ) +} + +func favicon() (*asset, error) { + bytes, err := faviconBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "favicon/favicon.ico", size: 3638, mode: os.FileMode(420), modTime: time.Unix(1460621502, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +// Asset loads and returns the asset for the given name. +// It returns an error if the asset could not be found or +// could not be loaded. +func Asset(name string) ([]byte, error) { + cannonicalName := strings.Replace(name, "\\", "/", -1) + if f, ok := _bindata[cannonicalName]; ok { + a, err := f() + if err != nil { + return nil, fmt.Errorf("Asset %s can't read by error: %v", name, err) + } + return a.bytes, nil + } + return nil, fmt.Errorf("Asset %s not found", name) +} + +// MustAsset is like Asset but panics when Asset would return an error. +// It simplifies safe initialization of global variables. +func MustAsset(name string) []byte { + a, err := Asset(name) + if err != nil { + panic("asset: Asset(" + name + "): " + err.Error()) + } + + return a +} + +// AssetInfo loads and returns the asset info for the given name. +// It returns an error if the asset could not be found or +// could not be loaded. +func AssetInfo(name string) (os.FileInfo, error) { + cannonicalName := strings.Replace(name, "\\", "/", -1) + if f, ok := _bindata[cannonicalName]; ok { + a, err := f() + if err != nil { + return nil, fmt.Errorf("AssetInfo %s can't read by error: %v", name, err) + } + return a.info, nil + } + return nil, fmt.Errorf("AssetInfo %s not found", name) +} + +// AssetNames returns the names of the assets. +func AssetNames() []string { + names := make([]string, 0, len(_bindata)) + for name := range _bindata { + names = append(names, name) + } + return names +} + +// _bindata is a table, holding each asset generator, mapped to its name. +var _bindata = map[string]func() (*asset, error){ + "favicon/favicon.ico": favicon, +} + +// AssetDir returns the file names below a certain +// directory embedded in the file by go-bindata. +// For example if you run go-bindata on data/... and data contains the +// following hierarchy: +// data/ +// foo.txt +// img/ +// a.png +// b.png +// then AssetDir("data") would return []string{"foo.txt", "img"} +// AssetDir("data/img") would return []string{"a.png", "b.png"} +// AssetDir("foo.txt") and AssetDir("notexist") would return an error +// AssetDir("") will return []string{"data"}. +func AssetDir(name string) ([]string, error) { + node := _bintree + if len(name) != 0 { + cannonicalName := strings.Replace(name, "\\", "/", -1) + pathList := strings.Split(cannonicalName, "/") + for _, p := range pathList { + node = node.Children[p] + if node == nil { + return nil, fmt.Errorf("Asset %s not found", name) + } + } + } + if node.Func != nil { + return nil, fmt.Errorf("Asset %s not found", name) + } + rv := make([]string, 0, len(node.Children)) + for childName := range node.Children { + rv = append(rv, childName) + } + return rv, nil +} + +type bintree struct { + Func func() (*asset, error) + Children map[string]*bintree +} +var _bintree = &bintree{nil, map[string]*bintree{ + "favicon": &bintree{nil, map[string]*bintree{ + "favicon.ico": &bintree{favicon, map[string]*bintree{}}, + }}, +}} + +// RestoreAsset restores an asset under the given directory +func RestoreAsset(dir, name string) error { + data, err := Asset(name) + if err != nil { + return err + } + info, err := AssetInfo(name) + if err != nil { + return err + } + err = os.MkdirAll(_filePath(dir, filepath.Dir(name)), os.FileMode(0755)) + if err != nil { + return err + } + err = ioutil.WriteFile(_filePath(dir, name), data, info.Mode()) + if err != nil { + return err + } + err = os.Chtimes(_filePath(dir, name), info.ModTime(), info.ModTime()) + if err != nil { + return err + } + return nil +} + +// RestoreAssets restores an asset under the given directory recursively +func RestoreAssets(dir, name string) error { + children, err := AssetDir(name) + // File + if err != nil { + return RestoreAsset(dir, name) + } + // Dir + for _, child := range children { + err = RestoreAssets(dir, filepath.Join(name, child)) + if err != nil { + return err + } + } + return nil +} + +func _filePath(dir, name string) string { + cannonicalName := strings.Replace(name, "\\", "/", -1) + return filepath.Join(append([]string{dir}, strings.Split(cannonicalName, "/")...)...) +} + diff --git a/weed/images/favicon/favicon.ico b/weed/images/favicon/favicon.ico Binary files differnew file mode 100644 index 000000000..cc6f531b3 --- /dev/null +++ b/weed/images/favicon/favicon.ico diff --git a/weed/images/orientation.go b/weed/images/orientation.go new file mode 100644 index 000000000..4bff89311 --- /dev/null +++ b/weed/images/orientation.go @@ -0,0 +1,182 @@ +package images + +import ( + "bytes" + "image" + "image/draw" + "image/jpeg" + "log" + + "github.com/rwcarlsen/goexif/exif" +) + +//many code is copied from http://camlistore.org/pkg/images/images.go +func FixJpgOrientation(data []byte) (oriented []byte) { + ex, err := exif.Decode(bytes.NewReader(data)) + if err != nil { + return data + } + tag, err := ex.Get(exif.Orientation) + if err != nil { + return data + } + angle := 0 + flipMode := FlipDirection(0) + orient, err := tag.Int(0) + if err != nil { + return data + } + switch orient { + case topLeftSide: + // do nothing + return data + case topRightSide: + flipMode = 2 + case bottomRightSide: + angle = 180 + case bottomLeftSide: + angle = 180 + flipMode = 2 + case leftSideTop: + angle = -90 + flipMode = 2 + case rightSideTop: + angle = -90 + case rightSideBottom: + angle = 90 + flipMode = 2 + case leftSideBottom: + angle = 90 + } + + if srcImage, _, err := image.Decode(bytes.NewReader(data)); err == nil { + dstImage := flip(rotate(srcImage, angle), flipMode) + var buf bytes.Buffer + jpeg.Encode(&buf, dstImage, nil) + return buf.Bytes() + } + + return data +} + +// Exif Orientation Tag values +// http://sylvana.net/jpegcrop/exif_orientation.html +const ( + topLeftSide = 1 + topRightSide = 2 + bottomRightSide = 3 + bottomLeftSide = 4 + leftSideTop = 5 + rightSideTop = 6 + rightSideBottom = 7 + leftSideBottom = 8 +) + +// The FlipDirection type is used by the Flip option in DecodeOpts +// to indicate in which direction to flip an image. +type FlipDirection int + +// FlipVertical and FlipHorizontal are two possible FlipDirections +// values to indicate in which direction an image will be flipped. +const ( + FlipVertical FlipDirection = 1 << iota + FlipHorizontal +) + +type DecodeOpts struct { + // Rotate specifies how to rotate the image. + // If nil, the image is rotated automatically based on EXIF metadata. + // If an int, Rotate is the number of degrees to rotate + // counter clockwise and must be one of 0, 90, -90, 180, or + // -180. + Rotate interface{} + + // Flip specifies how to flip the image. + // If nil, the image is flipped automatically based on EXIF metadata. + // Otherwise, Flip is a FlipDirection bitfield indicating how to flip. + Flip interface{} +} + +func rotate(im image.Image, angle int) image.Image { + var rotated *image.NRGBA + // trigonometric (i.e counter clock-wise) + switch angle { + case 90: + newH, newW := im.Bounds().Dx(), im.Bounds().Dy() + rotated = image.NewNRGBA(image.Rect(0, 0, newW, newH)) + for y := 0; y < newH; y++ { + for x := 0; x < newW; x++ { + rotated.Set(x, y, im.At(newH-1-y, x)) + } + } + case -90: + newH, newW := im.Bounds().Dx(), im.Bounds().Dy() + rotated = image.NewNRGBA(image.Rect(0, 0, newW, newH)) + for y := 0; y < newH; y++ { + for x := 0; x < newW; x++ { + rotated.Set(x, y, im.At(y, newW-1-x)) + } + } + case 180, -180: + newW, newH := im.Bounds().Dx(), im.Bounds().Dy() + rotated = image.NewNRGBA(image.Rect(0, 0, newW, newH)) + for y := 0; y < newH; y++ { + for x := 0; x < newW; x++ { + rotated.Set(x, y, im.At(newW-1-x, newH-1-y)) + } + } + default: + return im + } + return rotated +} + +// flip returns a flipped version of the image im, according to +// the direction(s) in dir. +// It may flip the imput im in place and return it, or it may allocate a +// new NRGBA (if im is an *image.YCbCr). +func flip(im image.Image, dir FlipDirection) image.Image { + if dir == 0 { + return im + } + ycbcr := false + var nrgba image.Image + dx, dy := im.Bounds().Dx(), im.Bounds().Dy() + di, ok := im.(draw.Image) + if !ok { + if _, ok := im.(*image.YCbCr); !ok { + log.Printf("failed to flip image: input does not satisfy draw.Image") + return im + } + // because YCbCr does not implement Set, we replace it with a new NRGBA + ycbcr = true + nrgba = image.NewNRGBA(image.Rect(0, 0, dx, dy)) + di, ok = nrgba.(draw.Image) + if !ok { + log.Print("failed to flip image: could not cast an NRGBA to a draw.Image") + return im + } + } + if dir&FlipHorizontal != 0 { + for y := 0; y < dy; y++ { + for x := 0; x < dx/2; x++ { + old := im.At(x, y) + di.Set(x, y, im.At(dx-1-x, y)) + di.Set(dx-1-x, y, old) + } + } + } + if dir&FlipVertical != 0 { + for y := 0; y < dy/2; y++ { + for x := 0; x < dx; x++ { + old := im.At(x, y) + di.Set(x, y, im.At(x, dy-1-y)) + di.Set(x, dy-1-y, old) + } + } + } + if ycbcr { + return nrgba + } + return im +} diff --git a/weed/images/orientation_test.go b/weed/images/orientation_test.go new file mode 100644 index 000000000..adab17ff8 --- /dev/null +++ b/weed/images/orientation_test.go @@ -0,0 +1,17 @@ +package images + +import ( + "io/ioutil" + "testing" +) + +func TestXYZ(t *testing.T) { + fname := "sample1.jpg" + + dat, _ := ioutil.ReadFile(fname) + + fixed_data := FixJpgOrientation(dat) + + ioutil.WriteFile("fixed1.jpg", fixed_data, 0644) + +} diff --git a/weed/images/preprocess.go b/weed/images/preprocess.go new file mode 100644 index 000000000..0d6cb2d9e --- /dev/null +++ b/weed/images/preprocess.go @@ -0,0 +1,27 @@ +package images + +import ( + "path/filepath" + "strings" +) + +/* +* Preprocess image files on client side. +* 1. possibly adjust the orientation +* 2. resize the image to a width or height limit +* 3. remove the exif data +* Call this function on any file uploaded to SeaweedFS +* + */ +func MaybePreprocessImage(filename string, data []byte, width, height int) (resized []byte, w int, h int) { + ext := filepath.Ext(filename) + ext = strings.ToLower(ext) + switch ext { + case ".png", ".gif": + return Resized(ext, data, width, height) + case ".jpg", ".jpeg": + data = FixJpgOrientation(data) + return Resized(ext, data, width, height) + } + return data, 0, 0 +} diff --git a/weed/images/resizing.go b/weed/images/resizing.go new file mode 100644 index 000000000..7e4a88c42 --- /dev/null +++ b/weed/images/resizing.go @@ -0,0 +1,46 @@ +package images + +import ( + "bytes" + "image" + "image/gif" + "image/jpeg" + "image/png" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/disintegration/imaging" +) + +func Resized(ext string, data []byte, width, height int) (resized []byte, w int, h int) { + if width == 0 && height == 0 { + return data, 0, 0 + } + srcImage, _, err := image.Decode(bytes.NewReader(data)) + if err == nil { + bounds := srcImage.Bounds() + var dstImage *image.NRGBA + if bounds.Dx() > width && width != 0 || bounds.Dy() > height && height != 0 { + if width == height && bounds.Dx() != bounds.Dy() { + dstImage = imaging.Thumbnail(srcImage, width, height, imaging.Lanczos) + w, h = width, height + } else { + dstImage = imaging.Resize(srcImage, width, height, imaging.Lanczos) + } + } else { + return data, bounds.Dx(), bounds.Dy() + } + var buf bytes.Buffer + switch ext { + case ".png": + png.Encode(&buf, dstImage) + case ".jpg", ".jpeg": + jpeg.Encode(&buf, dstImage, nil) + case ".gif": + gif.Encode(&buf, dstImage, nil) + } + return buf.Bytes(), dstImage.Bounds().Dx(), dstImage.Bounds().Dy() + } else { + glog.Error(err) + } + return data, 0, 0 +} diff --git a/weed/images/sample1.jpg b/weed/images/sample1.jpg Binary files differnew file mode 100644 index 000000000..991b59bac --- /dev/null +++ b/weed/images/sample1.jpg diff --git a/weed/operation/assign_file_id.go b/weed/operation/assign_file_id.go new file mode 100644 index 000000000..acc2d3034 --- /dev/null +++ b/weed/operation/assign_file_id.go @@ -0,0 +1,48 @@ +package operation + +import ( + "encoding/json" + "errors" + "fmt" + "net/url" + "strconv" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type AssignResult struct { + Fid string `json:"fid,omitempty"` + Url string `json:"url,omitempty"` + PublicUrl string `json:"publicUrl,omitempty"` + Count uint64 `json:"count,omitempty"` + Error string `json:"error,omitempty"` +} + +func Assign(server string, count uint64, replication string, collection string, ttl string) (*AssignResult, error) { + values := make(url.Values) + values.Add("count", strconv.FormatUint(count, 10)) + if replication != "" { + values.Add("replication", replication) + } + if collection != "" { + values.Add("collection", collection) + } + if ttl != "" { + values.Add("ttl", ttl) + } + jsonBlob, err := util.Post("http://"+server+"/dir/assign", values) + glog.V(2).Info("assign result :", string(jsonBlob)) + if err != nil { + return nil, err + } + var ret AssignResult + err = json.Unmarshal(jsonBlob, &ret) + if err != nil { + return nil, fmt.Errorf("/dir/assign result JSON unmarshal error:%v, json:%s", err, string(jsonBlob)) + } + if ret.Count <= 0 { + return nil, errors.New(ret.Error) + } + return &ret, nil +} diff --git a/weed/operation/chunked_file.go b/weed/operation/chunked_file.go new file mode 100644 index 000000000..52086514a --- /dev/null +++ b/weed/operation/chunked_file.go @@ -0,0 +1,213 @@ +package operation + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "sort" + + "sync" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + // when the remote server does not allow range requests (Accept-Ranges was not set) + ErrRangeRequestsNotSupported = errors.New("Range requests are not supported by the remote server") + // ErrInvalidRange is returned by Read when trying to read past the end of the file + ErrInvalidRange = errors.New("Invalid range") +) + +type ChunkInfo struct { + Fid string `json:"fid"` + Offset int64 `json:"offset"` + Size int64 `json:"size"` +} + +type ChunkList []*ChunkInfo + +type ChunkManifest struct { + Name string `json:"name,omitempty"` + Mime string `json:"mime,omitempty"` + Size int64 `json:"size,omitempty"` + Chunks ChunkList `json:"chunks,omitempty"` +} + +// seekable chunked file reader +type ChunkedFileReader struct { + Manifest *ChunkManifest + Master string + pos int64 + pr *io.PipeReader + pw *io.PipeWriter + mutex sync.Mutex +} + +func (s ChunkList) Len() int { return len(s) } +func (s ChunkList) Less(i, j int) bool { return s[i].Offset < s[j].Offset } +func (s ChunkList) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +func LoadChunkManifest(buffer []byte, isGzipped bool) (*ChunkManifest, error) { + if isGzipped { + var err error + if buffer, err = UnGzipData(buffer); err != nil { + return nil, err + } + } + cm := ChunkManifest{} + if e := json.Unmarshal(buffer, &cm); e != nil { + return nil, e + } + sort.Sort(cm.Chunks) + return &cm, nil +} + +func (cm *ChunkManifest) Marshal() ([]byte, error) { + return json.Marshal(cm) +} + +func (cm *ChunkManifest) DeleteChunks(master string) error { + deleteError := 0 + for _, ci := range cm.Chunks { + if e := DeleteFile(master, ci.Fid, ""); e != nil { + deleteError++ + glog.V(0).Infof("Delete %s error: %v, master: %s", ci.Fid, e, master) + } + } + if deleteError > 0 { + return errors.New("Not all chunks deleted.") + } + return nil +} + +func readChunkNeedle(fileUrl string, w io.Writer, offset int64) (written int64, e error) { + req, err := http.NewRequest("GET", fileUrl, nil) + if err != nil { + return written, err + } + if offset > 0 { + req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset)) + } + + resp, err := util.Do(req) + if err != nil { + return written, err + } + defer resp.Body.Close() + + switch resp.StatusCode { + case http.StatusRequestedRangeNotSatisfiable: + return written, ErrInvalidRange + case http.StatusOK: + if offset > 0 { + return written, ErrRangeRequestsNotSupported + } + case http.StatusPartialContent: + break + default: + return written, fmt.Errorf("Read chunk needle error: [%d] %s", resp.StatusCode, fileUrl) + + } + return io.Copy(w, resp.Body) +} + +func (cf *ChunkedFileReader) Seek(offset int64, whence int) (int64, error) { + var err error + switch whence { + case 0: + case 1: + offset += cf.pos + case 2: + offset = cf.Manifest.Size - offset + } + if offset > cf.Manifest.Size { + err = ErrInvalidRange + } + if cf.pos != offset { + cf.Close() + } + cf.pos = offset + return cf.pos, err +} + +func (cf *ChunkedFileReader) WriteTo(w io.Writer) (n int64, err error) { + cm := cf.Manifest + chunkIndex := -1 + chunkStartOffset := int64(0) + for i, ci := range cm.Chunks { + if cf.pos >= ci.Offset && cf.pos < ci.Offset+ci.Size { + chunkIndex = i + chunkStartOffset = cf.pos - ci.Offset + break + } + } + if chunkIndex < 0 { + return n, ErrInvalidRange + } + for ; chunkIndex < cm.Chunks.Len(); chunkIndex++ { + ci := cm.Chunks[chunkIndex] + // if we need read date from local volume server first? + fileUrl, lookupError := LookupFileId(cf.Master, ci.Fid) + if lookupError != nil { + return n, lookupError + } + if wn, e := readChunkNeedle(fileUrl, w, chunkStartOffset); e != nil { + return n, e + } else { + n += wn + cf.pos += wn + } + + chunkStartOffset = 0 + } + return n, nil +} + +func (cf *ChunkedFileReader) ReadAt(p []byte, off int64) (n int, err error) { + cf.Seek(off, 0) + return cf.Read(p) +} + +func (cf *ChunkedFileReader) Read(p []byte) (int, error) { + return cf.getPipeReader().Read(p) +} + +func (cf *ChunkedFileReader) Close() (e error) { + cf.mutex.Lock() + defer cf.mutex.Unlock() + return cf.closePipe() +} + +func (cf *ChunkedFileReader) closePipe() (e error) { + if cf.pr != nil { + if err := cf.pr.Close(); err != nil { + e = err + } + } + cf.pr = nil + if cf.pw != nil { + if err := cf.pw.Close(); err != nil { + e = err + } + } + cf.pw = nil + return e +} + +func (cf *ChunkedFileReader) getPipeReader() io.Reader { + cf.mutex.Lock() + defer cf.mutex.Unlock() + if cf.pr != nil && cf.pw != nil { + return cf.pr + } + cf.closePipe() + cf.pr, cf.pw = io.Pipe() + go func(pw *io.PipeWriter) { + _, e := cf.WriteTo(pw) + pw.CloseWithError(e) + }(cf.pw) + return cf.pr +} diff --git a/weed/operation/compress.go b/weed/operation/compress.go new file mode 100644 index 000000000..de62e5bf7 --- /dev/null +++ b/weed/operation/compress.go @@ -0,0 +1,59 @@ +package operation + +import ( + "bytes" + "compress/flate" + "compress/gzip" + "io/ioutil" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +/* +* Default more not to gzip since gzip can be done on client side. + */ +func IsGzippable(ext, mtype string) bool { + if strings.HasPrefix(mtype, "text/") { + return true + } + switch ext { + case ".zip", ".rar", ".gz", ".bz2", ".xz": + return false + case ".pdf", ".txt", ".html", ".htm", ".css", ".js", ".json": + return true + } + if strings.HasPrefix(mtype, "application/") { + if strings.HasSuffix(mtype, "xml") { + return true + } + if strings.HasSuffix(mtype, "script") { + return true + } + } + return false +} + +func GzipData(input []byte) ([]byte, error) { + buf := new(bytes.Buffer) + w, _ := gzip.NewWriterLevel(buf, flate.BestCompression) + if _, err := w.Write(input); err != nil { + glog.V(2).Infoln("error compressing data:", err) + return nil, err + } + if err := w.Close(); err != nil { + glog.V(2).Infoln("error closing compressed data:", err) + return nil, err + } + return buf.Bytes(), nil +} +func UnGzipData(input []byte) ([]byte, error) { + buf := bytes.NewBuffer(input) + r, _ := gzip.NewReader(buf) + defer r.Close() + output, err := ioutil.ReadAll(r) + if err != nil { + glog.V(2).Infoln("error uncompressing data:", err) + } + return output, err +} diff --git a/weed/operation/data_struts.go b/weed/operation/data_struts.go new file mode 100644 index 000000000..bfc53aa50 --- /dev/null +++ b/weed/operation/data_struts.go @@ -0,0 +1,7 @@ +package operation + +type JoinResult struct { + VolumeSizeLimit uint64 `json:"VolumeSizeLimit,omitempty"` + SecretKey string `json:"secretKey,omitempty"` + Error string `json:"error,omitempty"` +} diff --git a/weed/operation/delete_content.go b/weed/operation/delete_content.go new file mode 100644 index 000000000..b78221da1 --- /dev/null +++ b/weed/operation/delete_content.go @@ -0,0 +1,117 @@ +package operation + +import ( + "encoding/json" + "errors" + "fmt" + "net/url" + "strings" + "sync" + + "net/http" + + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type DeleteResult struct { + Fid string `json:"fid"` + Size int `json:"size"` + Status int `json:"status"` + Error string `json:"error,omitempty"` +} + +func DeleteFile(master string, fileId string, jwt security.EncodedJwt) error { + fileUrl, err := LookupFileId(master, fileId) + if err != nil { + return fmt.Errorf("Failed to lookup %s:%v", fileId, err) + } + err = util.Delete(fileUrl, jwt) + if err != nil { + return fmt.Errorf("Failed to delete %s:%v", fileUrl, err) + } + return nil +} + +func ParseFileId(fid string) (vid string, key_cookie string, err error) { + commaIndex := strings.Index(fid, ",") + if commaIndex <= 0 { + return "", "", errors.New("Wrong fid format.") + } + return fid[:commaIndex], fid[commaIndex+1:], nil +} + +type DeleteFilesResult struct { + Errors []string + Results []DeleteResult +} + +func DeleteFiles(master string, fileIds []string) (*DeleteFilesResult, error) { + vid_to_fileIds := make(map[string][]string) + ret := &DeleteFilesResult{} + var vids []string + for _, fileId := range fileIds { + vid, _, err := ParseFileId(fileId) + if err != nil { + ret.Results = append(ret.Results, DeleteResult{ + Fid: vid, + Status: http.StatusBadRequest, + Error: err.Error()}, + ) + continue + } + if _, ok := vid_to_fileIds[vid]; !ok { + vid_to_fileIds[vid] = make([]string, 0) + vids = append(vids, vid) + } + vid_to_fileIds[vid] = append(vid_to_fileIds[vid], fileId) + } + + lookupResults, err := LookupVolumeIds(master, vids) + if err != nil { + return ret, err + } + + server_to_fileIds := make(map[string][]string) + for vid, result := range lookupResults { + if result.Error != "" { + ret.Errors = append(ret.Errors, result.Error) + continue + } + for _, location := range result.Locations { + if _, ok := server_to_fileIds[location.Url]; !ok { + server_to_fileIds[location.Url] = make([]string, 0) + } + server_to_fileIds[location.Url] = append( + server_to_fileIds[location.Url], vid_to_fileIds[vid]...) + } + } + + var wg sync.WaitGroup + + for server, fidList := range server_to_fileIds { + wg.Add(1) + go func(server string, fidList []string) { + defer wg.Done() + values := make(url.Values) + for _, fid := range fidList { + values.Add("fid", fid) + } + jsonBlob, err := util.Post("http://"+server+"/delete", values) + if err != nil { + ret.Errors = append(ret.Errors, err.Error()+" "+string(jsonBlob)) + return + } + var result []DeleteResult + err = json.Unmarshal(jsonBlob, &result) + if err != nil { + ret.Errors = append(ret.Errors, err.Error()+" "+string(jsonBlob)) + return + } + ret.Results = append(ret.Results, result...) + }(server, fidList) + } + wg.Wait() + + return ret, nil +} diff --git a/weed/operation/list_masters.go b/weed/operation/list_masters.go new file mode 100644 index 000000000..0a15b0af8 --- /dev/null +++ b/weed/operation/list_masters.go @@ -0,0 +1,32 @@ +package operation + +import ( + "encoding/json" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type ClusterStatusResult struct { + IsLeader bool `json:"IsLeader,omitempty"` + Leader string `json:"Leader,omitempty"` + Peers []string `json:"Peers,omitempty"` +} + +func ListMasters(server string) ([]string, error) { + jsonBlob, err := util.Get("http://" + server + "/cluster/status") + glog.V(2).Info("list masters result :", string(jsonBlob)) + if err != nil { + return nil, err + } + var ret ClusterStatusResult + err = json.Unmarshal(jsonBlob, &ret) + if err != nil { + return nil, err + } + masters := ret.Peers + if ret.IsLeader { + masters = append(masters, ret.Leader) + } + return masters, nil +} diff --git a/weed/operation/lookup.go b/weed/operation/lookup.go new file mode 100644 index 000000000..19d9dbb94 --- /dev/null +++ b/weed/operation/lookup.go @@ -0,0 +1,118 @@ +package operation + +import ( + "encoding/json" + "errors" + "fmt" + "math/rand" + "net/url" + "strings" + "time" + + "github.com/chrislusf/seaweedfs/weed/util" +) + +type Location struct { + Url string `json:"url,omitempty"` + PublicUrl string `json:"publicUrl,omitempty"` +} +type LookupResult struct { + VolumeId string `json:"volumeId,omitempty"` + Locations []Location `json:"locations,omitempty"` + Error string `json:"error,omitempty"` +} + +func (lr *LookupResult) String() string { + return fmt.Sprintf("VolumeId:%s, Locations:%v, Error:%s", lr.VolumeId, lr.Locations, lr.Error) +} + +var ( + vc VidCache // caching of volume locations, re-check if after 10 minutes +) + +func Lookup(server string, vid string) (ret *LookupResult, err error) { + locations, cache_err := vc.Get(vid) + if cache_err != nil { + if ret, err = do_lookup(server, vid); err == nil { + vc.Set(vid, ret.Locations, 10*time.Minute) + } + } else { + ret = &LookupResult{VolumeId: vid, Locations: locations} + } + return +} + +func do_lookup(server string, vid string) (*LookupResult, error) { + values := make(url.Values) + values.Add("volumeId", vid) + jsonBlob, err := util.Post("http://"+server+"/dir/lookup", values) + if err != nil { + return nil, err + } + var ret LookupResult + err = json.Unmarshal(jsonBlob, &ret) + if err != nil { + return nil, err + } + if ret.Error != "" { + return nil, errors.New(ret.Error) + } + return &ret, nil +} + +func LookupFileId(server string, fileId string) (fullUrl string, err error) { + parts := strings.Split(fileId, ",") + if len(parts) != 2 { + return "", errors.New("Invalid fileId " + fileId) + } + lookup, lookupError := Lookup(server, parts[0]) + if lookupError != nil { + return "", lookupError + } + if len(lookup.Locations) == 0 { + return "", errors.New("File Not Found") + } + return "http://" + lookup.Locations[rand.Intn(len(lookup.Locations))].Url + "/" + fileId, nil +} + +// LookupVolumeIds find volume locations by cache and actual lookup +func LookupVolumeIds(server string, vids []string) (map[string]LookupResult, error) { + ret := make(map[string]LookupResult) + var unknown_vids []string + + //check vid cache first + for _, vid := range vids { + locations, cache_err := vc.Get(vid) + if cache_err == nil { + ret[vid] = LookupResult{VolumeId: vid, Locations: locations} + } else { + unknown_vids = append(unknown_vids, vid) + } + } + //return success if all volume ids are known + if len(unknown_vids) == 0 { + return ret, nil + } + + //only query unknown_vids + values := make(url.Values) + for _, vid := range unknown_vids { + values.Add("volumeId", vid) + } + jsonBlob, err := util.Post("http://"+server+"/vol/lookup", values) + if err != nil { + return nil, err + } + err = json.Unmarshal(jsonBlob, &ret) + if err != nil { + return nil, errors.New(err.Error() + " " + string(jsonBlob)) + } + + //set newly checked vids to cache + for _, vid := range unknown_vids { + locations := ret[vid].Locations + vc.Set(vid, locations, 10*time.Minute) + } + + return ret, nil +} diff --git a/weed/operation/lookup_vid_cache.go b/weed/operation/lookup_vid_cache.go new file mode 100644 index 000000000..1ed03613d --- /dev/null +++ b/weed/operation/lookup_vid_cache.go @@ -0,0 +1,51 @@ +package operation + +import ( + "errors" + "strconv" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type VidInfo struct { + Locations []Location + NextRefreshTime time.Time +} +type VidCache struct { + cache []VidInfo +} + +func (vc *VidCache) Get(vid string) ([]Location, error) { + id, err := strconv.Atoi(vid) + if err != nil { + glog.V(1).Infof("Unknown volume id %s", vid) + return nil, err + } + if 0 < id && id <= len(vc.cache) { + if vc.cache[id-1].Locations == nil { + return nil, errors.New("Not Set") + } + if vc.cache[id-1].NextRefreshTime.Before(time.Now()) { + return nil, errors.New("Expired") + } + return vc.cache[id-1].Locations, nil + } + return nil, errors.New("Not Found") +} +func (vc *VidCache) Set(vid string, locations []Location, duration time.Duration) { + id, err := strconv.Atoi(vid) + if err != nil { + glog.V(1).Infof("Unknown volume id %s", vid) + return + } + if id > len(vc.cache) { + for i := id - len(vc.cache); i > 0; i-- { + vc.cache = append(vc.cache, VidInfo{}) + } + } + if id > 0 { + vc.cache[id-1].Locations = locations + vc.cache[id-1].NextRefreshTime = time.Now().Add(duration) + } +} diff --git a/weed/operation/lookup_vid_cache_test.go b/weed/operation/lookup_vid_cache_test.go new file mode 100644 index 000000000..9c9e2affb --- /dev/null +++ b/weed/operation/lookup_vid_cache_test.go @@ -0,0 +1,26 @@ +package operation + +import ( + "fmt" + "testing" + "time" +) + +func TestCaching(t *testing.T) { + var ( + vc VidCache + ) + var locations []Location + locations = append(locations, Location{Url: "a.com:8080"}) + vc.Set("123", locations, time.Second) + ret, _ := vc.Get("123") + if ret == nil { + t.Fatal("Not found vid 123") + } + fmt.Printf("vid 123 locations = %v\n", ret) + time.Sleep(2 * time.Second) + ret, _ = vc.Get("123") + if ret != nil { + t.Fatal("Not found vid 123") + } +} diff --git a/weed/operation/submit.go b/weed/operation/submit.go new file mode 100644 index 000000000..19bbd7a70 --- /dev/null +++ b/weed/operation/submit.go @@ -0,0 +1,194 @@ +package operation + +import ( + "bytes" + "io" + "mime" + "net/url" + "os" + "path" + "strconv" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/security" +) + +type FilePart struct { + Reader io.Reader + FileName string + FileSize int64 + IsGzipped bool + MimeType string + ModTime int64 //in seconds + Replication string + Collection string + Ttl string + Server string //this comes from assign result + Fid string //this comes from assign result, but customizable +} + +type SubmitResult struct { + FileName string `json:"fileName,omitempty"` + FileUrl string `json:"fileUrl,omitempty"` + Fid string `json:"fid,omitempty"` + Size uint32 `json:"size,omitempty"` + Error string `json:"error,omitempty"` +} + +func SubmitFiles(master string, files []FilePart, + replication string, collection string, ttl string, maxMB int, + secret security.Secret, +) ([]SubmitResult, error) { + results := make([]SubmitResult, len(files)) + for index, file := range files { + results[index].FileName = file.FileName + } + ret, err := Assign(master, uint64(len(files)), replication, collection, ttl) + if err != nil { + for index, _ := range files { + results[index].Error = err.Error() + } + return results, err + } + for index, file := range files { + file.Fid = ret.Fid + if index > 0 { + file.Fid = file.Fid + "_" + strconv.Itoa(index) + } + file.Server = ret.Url + file.Replication = replication + file.Collection = collection + results[index].Size, err = file.Upload(maxMB, master, secret) + if err != nil { + results[index].Error = err.Error() + } + results[index].Fid = file.Fid + results[index].FileUrl = ret.PublicUrl + "/" + file.Fid + } + return results, nil +} + +func NewFileParts(fullPathFilenames []string) (ret []FilePart, err error) { + ret = make([]FilePart, len(fullPathFilenames)) + for index, file := range fullPathFilenames { + if ret[index], err = newFilePart(file); err != nil { + return + } + } + return +} +func newFilePart(fullPathFilename string) (ret FilePart, err error) { + fh, openErr := os.Open(fullPathFilename) + if openErr != nil { + glog.V(0).Info("Failed to open file: ", fullPathFilename) + return ret, openErr + } + ret.Reader = fh + + if fi, fiErr := fh.Stat(); fiErr != nil { + glog.V(0).Info("Failed to stat file:", fullPathFilename) + return ret, fiErr + } else { + ret.ModTime = fi.ModTime().UTC().Unix() + ret.FileSize = fi.Size() + } + ext := strings.ToLower(path.Ext(fullPathFilename)) + ret.IsGzipped = ext == ".gz" + if ret.IsGzipped { + ret.FileName = fullPathFilename[0 : len(fullPathFilename)-3] + } + ret.FileName = fullPathFilename + if ext != "" { + ret.MimeType = mime.TypeByExtension(ext) + } + + return ret, nil +} + +func (fi FilePart) Upload(maxMB int, master string, secret security.Secret) (retSize uint32, err error) { + jwt := security.GenJwt(secret, fi.Fid) + fileUrl := "http://" + fi.Server + "/" + fi.Fid + if fi.ModTime != 0 { + fileUrl += "?ts=" + strconv.Itoa(int(fi.ModTime)) + } + if closer, ok := fi.Reader.(io.Closer); ok { + defer closer.Close() + } + baseName := path.Base(fi.FileName) + if maxMB > 0 && fi.FileSize > int64(maxMB*1024*1024) { + chunkSize := int64(maxMB * 1024 * 1024) + chunks := fi.FileSize/chunkSize + 1 + cm := ChunkManifest{ + Name: baseName, + Size: fi.FileSize, + Mime: fi.MimeType, + Chunks: make([]*ChunkInfo, 0, chunks), + } + + for i := int64(0); i < chunks; i++ { + id, count, e := upload_one_chunk( + baseName+"-"+strconv.FormatInt(i+1, 10), + io.LimitReader(fi.Reader, chunkSize), + master, fi.Replication, fi.Collection, fi.Ttl, + jwt) + if e != nil { + // delete all uploaded chunks + cm.DeleteChunks(master) + return 0, e + } + cm.Chunks = append(cm.Chunks, + &ChunkInfo{ + Offset: i * chunkSize, + Size: int64(count), + Fid: id, + }, + ) + retSize += count + } + err = upload_chunked_file_manifest(fileUrl, &cm, jwt) + if err != nil { + // delete all uploaded chunks + cm.DeleteChunks(master) + } + } else { + ret, e := Upload(fileUrl, baseName, fi.Reader, fi.IsGzipped, fi.MimeType, jwt) + if e != nil { + return 0, e + } + return ret.Size, e + } + return +} + +func upload_one_chunk(filename string, reader io.Reader, master, + replication string, collection string, ttl string, jwt security.EncodedJwt, +) (fid string, size uint32, e error) { + ret, err := Assign(master, 1, replication, collection, ttl) + if err != nil { + return "", 0, err + } + fileUrl, fid := "http://"+ret.Url+"/"+ret.Fid, ret.Fid + glog.V(4).Info("Uploading part ", filename, " to ", fileUrl, "...") + uploadResult, uploadError := Upload(fileUrl, filename, reader, false, + "application/octet-stream", jwt) + if uploadError != nil { + return fid, 0, uploadError + } + return fid, uploadResult.Size, nil +} + +func upload_chunked_file_manifest(fileUrl string, manifest *ChunkManifest, jwt security.EncodedJwt) error { + buf, e := manifest.Marshal() + if e != nil { + return e + } + bufReader := bytes.NewReader(buf) + glog.V(4).Info("Uploading chunks manifest ", manifest.Name, " to ", fileUrl, "...") + u, _ := url.Parse(fileUrl) + q := u.Query() + q.Set("cm", "true") + u.RawQuery = q.Encode() + _, e = Upload(u.String(), manifest.Name, bufReader, false, "application/json", jwt) + return e +} diff --git a/weed/operation/sync_volume.go b/weed/operation/sync_volume.go new file mode 100644 index 000000000..b7a727fc7 --- /dev/null +++ b/weed/operation/sync_volume.go @@ -0,0 +1,54 @@ +package operation + +import ( + "encoding/json" + "fmt" + "net/url" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type SyncVolumeResponse struct { + Replication string `json:"Replication,omitempty"` + Ttl string `json:"Ttl,omitempty"` + TailOffset uint64 `json:"TailOffset,omitempty"` + CompactRevision uint16 `json:"CompactRevision,omitempty"` + IdxFileSize uint64 `json:"IdxFileSize,omitempty"` + Error string `json:"error,omitempty"` +} + +func GetVolumeSyncStatus(server string, vid string) (*SyncVolumeResponse, error) { + values := make(url.Values) + values.Add("volume", vid) + jsonBlob, err := util.Post("http://"+server+"/admin/sync/status", values) + glog.V(2).Info("sync volume result :", string(jsonBlob)) + if err != nil { + return nil, err + } + var ret SyncVolumeResponse + err = json.Unmarshal(jsonBlob, &ret) + if err != nil { + return nil, err + } + if ret.Error != "" { + return nil, fmt.Errorf("Volume %s get sync status error: %s", vid, ret.Error) + } + return &ret, nil +} + +func GetVolumeIdxEntries(server string, vid string, eachEntryFn func(key uint64, offset, size uint32)) error { + values := make(url.Values) + values.Add("volume", vid) + line := make([]byte, 16) + err := util.GetBufferStream("http://"+server+"/admin/sync/index", values, line, func(bytes []byte) { + key := util.BytesToUint64(bytes[:8]) + offset := util.BytesToUint32(bytes[8:12]) + size := util.BytesToUint32(bytes[12:16]) + eachEntryFn(key, offset, size) + }) + if err != nil { + return err + } + return nil +} diff --git a/weed/operation/system_message.pb.go b/weed/operation/system_message.pb.go new file mode 100644 index 000000000..742a1ca4e --- /dev/null +++ b/weed/operation/system_message.pb.go @@ -0,0 +1,203 @@ +// Code generated by protoc-gen-go. +// source: system_message.proto +// DO NOT EDIT! + +/* +Package operation is a generated protocol buffer package. + +It is generated from these files: + system_message.proto + +It has these top-level messages: + VolumeInformationMessage + JoinMessage +*/ +package operation + +import proto "github.com/golang/protobuf/proto" +import math "math" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = math.Inf + +type VolumeInformationMessage struct { + Id *uint32 `protobuf:"varint,1,req,name=id" json:"id,omitempty"` + Size *uint64 `protobuf:"varint,2,req,name=size" json:"size,omitempty"` + Collection *string `protobuf:"bytes,3,opt,name=collection" json:"collection,omitempty"` + FileCount *uint64 `protobuf:"varint,4,req,name=file_count" json:"file_count,omitempty"` + DeleteCount *uint64 `protobuf:"varint,5,req,name=delete_count" json:"delete_count,omitempty"` + DeletedByteCount *uint64 `protobuf:"varint,6,req,name=deleted_byte_count" json:"deleted_byte_count,omitempty"` + ReadOnly *bool `protobuf:"varint,7,opt,name=read_only" json:"read_only,omitempty"` + ReplicaPlacement *uint32 `protobuf:"varint,8,req,name=replica_placement" json:"replica_placement,omitempty"` + Version *uint32 `protobuf:"varint,9,opt,name=version,def=2" json:"version,omitempty"` + Ttl *uint32 `protobuf:"varint,10,opt,name=ttl" json:"ttl,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *VolumeInformationMessage) Reset() { *m = VolumeInformationMessage{} } +func (m *VolumeInformationMessage) String() string { return proto.CompactTextString(m) } +func (*VolumeInformationMessage) ProtoMessage() {} + +const Default_VolumeInformationMessage_Version uint32 = 2 + +func (m *VolumeInformationMessage) GetId() uint32 { + if m != nil && m.Id != nil { + return *m.Id + } + return 0 +} + +func (m *VolumeInformationMessage) GetSize() uint64 { + if m != nil && m.Size != nil { + return *m.Size + } + return 0 +} + +func (m *VolumeInformationMessage) GetCollection() string { + if m != nil && m.Collection != nil { + return *m.Collection + } + return "" +} + +func (m *VolumeInformationMessage) GetFileCount() uint64 { + if m != nil && m.FileCount != nil { + return *m.FileCount + } + return 0 +} + +func (m *VolumeInformationMessage) GetDeleteCount() uint64 { + if m != nil && m.DeleteCount != nil { + return *m.DeleteCount + } + return 0 +} + +func (m *VolumeInformationMessage) GetDeletedByteCount() uint64 { + if m != nil && m.DeletedByteCount != nil { + return *m.DeletedByteCount + } + return 0 +} + +func (m *VolumeInformationMessage) GetReadOnly() bool { + if m != nil && m.ReadOnly != nil { + return *m.ReadOnly + } + return false +} + +func (m *VolumeInformationMessage) GetReplicaPlacement() uint32 { + if m != nil && m.ReplicaPlacement != nil { + return *m.ReplicaPlacement + } + return 0 +} + +func (m *VolumeInformationMessage) GetVersion() uint32 { + if m != nil && m.Version != nil { + return *m.Version + } + return Default_VolumeInformationMessage_Version +} + +func (m *VolumeInformationMessage) GetTtl() uint32 { + if m != nil && m.Ttl != nil { + return *m.Ttl + } + return 0 +} + +type JoinMessage struct { + IsInit *bool `protobuf:"varint,1,opt,name=is_init" json:"is_init,omitempty"` + Ip *string `protobuf:"bytes,2,req,name=ip" json:"ip,omitempty"` + Port *uint32 `protobuf:"varint,3,req,name=port" json:"port,omitempty"` + PublicUrl *string `protobuf:"bytes,4,opt,name=public_url" json:"public_url,omitempty"` + MaxVolumeCount *uint32 `protobuf:"varint,5,req,name=max_volume_count" json:"max_volume_count,omitempty"` + MaxFileKey *uint64 `protobuf:"varint,6,req,name=max_file_key" json:"max_file_key,omitempty"` + DataCenter *string `protobuf:"bytes,7,opt,name=data_center" json:"data_center,omitempty"` + Rack *string `protobuf:"bytes,8,opt,name=rack" json:"rack,omitempty"` + Volumes []*VolumeInformationMessage `protobuf:"bytes,9,rep,name=volumes" json:"volumes,omitempty"` + AdminPort *uint32 `protobuf:"varint,10,opt,name=admin_port" json:"admin_port,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *JoinMessage) Reset() { *m = JoinMessage{} } +func (m *JoinMessage) String() string { return proto.CompactTextString(m) } +func (*JoinMessage) ProtoMessage() {} + +func (m *JoinMessage) GetIsInit() bool { + if m != nil && m.IsInit != nil { + return *m.IsInit + } + return false +} + +func (m *JoinMessage) GetIp() string { + if m != nil && m.Ip != nil { + return *m.Ip + } + return "" +} + +func (m *JoinMessage) GetPort() uint32 { + if m != nil && m.Port != nil { + return *m.Port + } + return 0 +} + +func (m *JoinMessage) GetPublicUrl() string { + if m != nil && m.PublicUrl != nil { + return *m.PublicUrl + } + return "" +} + +func (m *JoinMessage) GetMaxVolumeCount() uint32 { + if m != nil && m.MaxVolumeCount != nil { + return *m.MaxVolumeCount + } + return 0 +} + +func (m *JoinMessage) GetMaxFileKey() uint64 { + if m != nil && m.MaxFileKey != nil { + return *m.MaxFileKey + } + return 0 +} + +func (m *JoinMessage) GetDataCenter() string { + if m != nil && m.DataCenter != nil { + return *m.DataCenter + } + return "" +} + +func (m *JoinMessage) GetRack() string { + if m != nil && m.Rack != nil { + return *m.Rack + } + return "" +} + +func (m *JoinMessage) GetVolumes() []*VolumeInformationMessage { + if m != nil { + return m.Volumes + } + return nil +} + +func (m *JoinMessage) GetAdminPort() uint32 { + if m != nil && m.AdminPort != nil { + return *m.AdminPort + } + return 0 +} + +func init() { +} diff --git a/weed/operation/system_message_test.go b/weed/operation/system_message_test.go new file mode 100644 index 000000000..d18ca49a4 --- /dev/null +++ b/weed/operation/system_message_test.go @@ -0,0 +1,59 @@ +package operation + +import ( + "encoding/json" + "log" + "testing" + + "github.com/golang/protobuf/proto" +) + +func TestSerialDeserial(t *testing.T) { + volumeMessage := &VolumeInformationMessage{ + Id: proto.Uint32(12), + Size: proto.Uint64(2341234), + Collection: proto.String("benchmark"), + FileCount: proto.Uint64(2341234), + DeleteCount: proto.Uint64(234), + DeletedByteCount: proto.Uint64(21234), + ReadOnly: proto.Bool(false), + ReplicaPlacement: proto.Uint32(210), + Version: proto.Uint32(2), + } + var volumeMessages []*VolumeInformationMessage + volumeMessages = append(volumeMessages, volumeMessage) + + joinMessage := &JoinMessage{ + IsInit: proto.Bool(true), + Ip: proto.String("127.0.3.12"), + Port: proto.Uint32(34546), + PublicUrl: proto.String("localhost:2342"), + MaxVolumeCount: proto.Uint32(210), + MaxFileKey: proto.Uint64(324234423), + DataCenter: proto.String("dc1"), + Rack: proto.String("rack2"), + Volumes: volumeMessages, + } + + data, err := proto.Marshal(joinMessage) + if err != nil { + log.Fatal("marshaling error: ", err) + } + newMessage := &JoinMessage{} + err = proto.Unmarshal(data, newMessage) + if err != nil { + log.Fatal("unmarshaling error: ", err) + } + log.Println("The pb data size is", len(data)) + + jsonData, jsonError := json.Marshal(joinMessage) + if jsonError != nil { + log.Fatal("json marshaling error: ", jsonError) + } + log.Println("The json data size is", len(jsonData), string(jsonData)) + + // Now test and newTest contain the same data. + if *joinMessage.PublicUrl != *newMessage.PublicUrl { + log.Fatalf("data mismatch %q != %q", *joinMessage.PublicUrl, *newMessage.PublicUrl) + } +} diff --git a/weed/operation/upload_content.go b/weed/operation/upload_content.go new file mode 100644 index 000000000..a87784cad --- /dev/null +++ b/weed/operation/upload_content.go @@ -0,0 +1,96 @@ +package operation + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "mime" + "mime/multipart" + "net/http" + "net/textproto" + "path/filepath" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/security" +) + +type UploadResult struct { + Name string `json:"name,omitempty"` + Size uint32 `json:"size,omitempty"` + Error string `json:"error,omitempty"` +} + +var ( + client *http.Client +) + +func init() { + client = &http.Client{Transport: &http.Transport{ + MaxIdleConnsPerHost: 1024, + }} +} + +var fileNameEscaper = strings.NewReplacer("\\", "\\\\", "\"", "\\\"") + +func Upload(uploadUrl string, filename string, reader io.Reader, isGzipped bool, mtype string, jwt security.EncodedJwt) (*UploadResult, error) { + return upload_content(uploadUrl, func(w io.Writer) (err error) { + _, err = io.Copy(w, reader) + return + }, filename, isGzipped, mtype, jwt) +} +func upload_content(uploadUrl string, fillBufferFunction func(w io.Writer) error, filename string, isGzipped bool, mtype string, jwt security.EncodedJwt) (*UploadResult, error) { + body_buf := bytes.NewBufferString("") + body_writer := multipart.NewWriter(body_buf) + h := make(textproto.MIMEHeader) + h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="file"; filename="%s"`, fileNameEscaper.Replace(filename))) + if mtype == "" { + mtype = mime.TypeByExtension(strings.ToLower(filepath.Ext(filename))) + } + if mtype != "" { + h.Set("Content-Type", mtype) + } + if isGzipped { + h.Set("Content-Encoding", "gzip") + } + if jwt != "" { + h.Set("Authorization", "BEARER "+string(jwt)) + } + file_writer, cp_err := body_writer.CreatePart(h) + if cp_err != nil { + glog.V(0).Infoln("error creating form file", cp_err.Error()) + return nil, cp_err + } + if err := fillBufferFunction(file_writer); err != nil { + glog.V(0).Infoln("error copying data", err) + return nil, err + } + content_type := body_writer.FormDataContentType() + if err := body_writer.Close(); err != nil { + glog.V(0).Infoln("error closing body", err) + return nil, err + } + resp, post_err := client.Post(uploadUrl, content_type, body_buf) + if post_err != nil { + glog.V(0).Infoln("failing to upload to", uploadUrl, post_err.Error()) + return nil, post_err + } + defer resp.Body.Close() + resp_body, ra_err := ioutil.ReadAll(resp.Body) + if ra_err != nil { + return nil, ra_err + } + var ret UploadResult + unmarshal_err := json.Unmarshal(resp_body, &ret) + if unmarshal_err != nil { + glog.V(0).Infoln("failing to read upload resonse", uploadUrl, string(resp_body)) + return nil, unmarshal_err + } + if ret.Error != "" { + return nil, errors.New(ret.Error) + } + return &ret, nil +} diff --git a/weed/proto/Makefile b/weed/proto/Makefile new file mode 100644 index 000000000..73af851dd --- /dev/null +++ b/weed/proto/Makefile @@ -0,0 +1,4 @@ +TARG=../operation + +all: + protoc --go_out=$(TARG) system_message.proto diff --git a/weed/proto/system_message.proto b/weed/proto/system_message.proto new file mode 100644 index 000000000..548360b27 --- /dev/null +++ b/weed/proto/system_message.proto @@ -0,0 +1,27 @@ +package operation; + +message VolumeInformationMessage { + required uint32 id = 1; + required uint64 size = 2; + optional string collection = 3; + required uint64 file_count = 4; + required uint64 delete_count = 5; + required uint64 deleted_byte_count = 6; + optional bool read_only = 7; + required uint32 replica_placement = 8; + optional uint32 version = 9 [default=2]; + optional uint32 ttl = 10; +} + +message JoinMessage { + optional bool is_init = 1; + required string ip = 2; + required uint32 port = 3; + optional string public_url = 4; + required uint32 max_volume_count = 5; + required uint64 max_file_key = 6; + optional string data_center = 7; + optional string rack = 8; + repeated VolumeInformationMessage volumes = 9; + optional uint32 admin_port = 10; +} diff --git a/weed/security/guard.go b/weed/security/guard.go new file mode 100644 index 000000000..76f64cc0e --- /dev/null +++ b/weed/security/guard.go @@ -0,0 +1,162 @@ +package security + +import ( + "errors" + "fmt" + "net" + "net/http" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +var ( + ErrUnauthorized = errors.New("unauthorized token") +) + +/* +Guard is to ensure data access security. +There are 2 ways to check access: +1. white list. It's checking request ip address. +2. JSON Web Token(JWT) generated from secretKey. + The jwt can come from: + 1. url parameter jwt=... + 2. request header "Authorization" + 3. cookie with the name "jwt" + +The white list is checked first because it is easy. +Then the JWT is checked. + +The Guard will also check these claims if provided: +1. "exp" Expiration Time +2. "nbf" Not Before + +Generating JWT: +1. use HS256 to sign +2. optionally set "exp", "nbf" fields, in Unix time, + the number of seconds elapsed since January 1, 1970 UTC. + +Referenced: +https://github.com/pkieltyka/jwtauth/blob/master/jwtauth.go + +*/ +type Guard struct { + whiteList []string + SecretKey Secret + + isActive bool +} + +func NewGuard(whiteList []string, secretKey string) *Guard { + g := &Guard{whiteList: whiteList, SecretKey: Secret(secretKey)} + g.isActive = len(g.whiteList) != 0 || len(g.SecretKey) != 0 + return g +} + +func (g *Guard) WhiteList(f func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) { + if !g.isActive { + //if no security needed, just skip all checkings + return f + } + return func(w http.ResponseWriter, r *http.Request) { + if err := g.checkWhiteList(w, r); err != nil { + w.WriteHeader(http.StatusUnauthorized) + return + } + f(w, r) + } +} + +func (g *Guard) Secure(f func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) { + if !g.isActive { + //if no security needed, just skip all checkings + return f + } + return func(w http.ResponseWriter, r *http.Request) { + if err := g.checkJwt(w, r); err != nil { + w.WriteHeader(http.StatusUnauthorized) + return + } + f(w, r) + } +} + +func GetActualRemoteHost(r *http.Request) (host string, err error) { + host = r.Header.Get("HTTP_X_FORWARDED_FOR") + if host == "" { + host = r.Header.Get("X-FORWARDED-FOR") + } + if strings.Contains(host, ",") { + host = host[0:strings.Index(host, ",")] + } + if host == "" { + host, _, err = net.SplitHostPort(r.RemoteAddr) + } + return +} + +func (g *Guard) checkWhiteList(w http.ResponseWriter, r *http.Request) error { + if len(g.whiteList) == 0 { + return nil + } + + host, err := GetActualRemoteHost(r) + if err == nil { + for _, ip := range g.whiteList { + + // If the whitelist entry contains a "/" it + // is a CIDR range, and we should check the + // remote host is within it + if strings.Contains(ip, "/") { + _, cidrnet, err := net.ParseCIDR(ip) + if err != nil { + panic(err) + } + remote := net.ParseIP(host) + if cidrnet.Contains(remote) { + return nil + } + } + + // + // Otherwise we're looking for a literal match. + // + if ip == host { + return nil + } + } + } + + glog.V(1).Infof("Not in whitelist: %s", r.RemoteAddr) + return fmt.Errorf("Not in whitelis: %s", r.RemoteAddr) +} + +func (g *Guard) checkJwt(w http.ResponseWriter, r *http.Request) error { + if g.checkWhiteList(w, r) == nil { + return nil + } + + if len(g.SecretKey) == 0 { + return nil + } + + tokenStr := GetJwt(r) + + if tokenStr == "" { + return ErrUnauthorized + } + + // Verify the token + token, err := DecodeJwt(g.SecretKey, tokenStr) + if err != nil { + glog.V(1).Infof("Token verification error from %s: %v", r.RemoteAddr, err) + return ErrUnauthorized + } + if !token.Valid { + glog.V(1).Infof("Token invliad from %s: %v", r.RemoteAddr, tokenStr) + return ErrUnauthorized + } + + glog.V(1).Infof("No permission from %s", r.RemoteAddr) + return fmt.Errorf("No write permisson from %s", r.RemoteAddr) +} diff --git a/weed/security/jwt.go b/weed/security/jwt.go new file mode 100644 index 000000000..a2472ca6e --- /dev/null +++ b/weed/security/jwt.go @@ -0,0 +1,72 @@ +package security + +import ( + "net/http" + "strings" + + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + jwt "github.com/dgrijalva/jwt-go" +) + +type EncodedJwt string +type Secret string + +func GenJwt(secret Secret, fileId string) EncodedJwt { + if secret == "" { + return "" + } + + t := jwt.New(jwt.GetSigningMethod("HS256")) + t.Claims["exp"] = time.Now().Unix() + 10 + t.Claims["sub"] = fileId + encoded, e := t.SignedString(secret) + if e != nil { + glog.V(0).Infof("Failed to sign claims: %v", t.Claims) + return "" + } + return EncodedJwt(encoded) +} + +func GetJwt(r *http.Request) EncodedJwt { + + // Get token from query params + tokenStr := r.URL.Query().Get("jwt") + + // Get token from authorization header + if tokenStr == "" { + bearer := r.Header.Get("Authorization") + if len(bearer) > 7 && strings.ToUpper(bearer[0:6]) == "BEARER" { + tokenStr = bearer[7:] + } + } + + // Get token from cookie + if tokenStr == "" { + cookie, err := r.Cookie("jwt") + if err == nil { + tokenStr = cookie.Value + } + } + + return EncodedJwt(tokenStr) +} + +func EncodeJwt(secret Secret, claims map[string]interface{}) (EncodedJwt, error) { + if secret == "" { + return "", nil + } + + t := jwt.New(jwt.GetSigningMethod("HS256")) + t.Claims = claims + encoded, e := t.SignedString(secret) + return EncodedJwt(encoded), e +} + +func DecodeJwt(secret Secret, tokenString EncodedJwt) (token *jwt.Token, err error) { + // check exp, nbf + return jwt.Parse(string(tokenString), func(token *jwt.Token) (interface{}, error) { + return secret, nil + }) +} diff --git a/weed/sequence/memory_sequencer.go b/weed/sequence/memory_sequencer.go new file mode 100644 index 000000000..d727dc723 --- /dev/null +++ b/weed/sequence/memory_sequencer.go @@ -0,0 +1,36 @@ +package sequence + +import ( + "sync" +) + +// just for testing +type MemorySequencer struct { + counter uint64 + sequenceLock sync.Mutex +} + +func NewMemorySequencer() (m *MemorySequencer) { + m = &MemorySequencer{counter: 1} + return +} + +func (m *MemorySequencer) NextFileId(count uint64) (uint64, uint64) { + m.sequenceLock.Lock() + defer m.sequenceLock.Unlock() + ret := m.counter + m.counter += uint64(count) + return ret, count +} + +func (m *MemorySequencer) SetMax(seenValue uint64) { + m.sequenceLock.Lock() + defer m.sequenceLock.Unlock() + if m.counter <= seenValue { + m.counter = seenValue + 1 + } +} + +func (m *MemorySequencer) Peek() uint64 { + return m.counter +} diff --git a/weed/sequence/sequence.go b/weed/sequence/sequence.go new file mode 100644 index 000000000..fbdc3b8ef --- /dev/null +++ b/weed/sequence/sequence.go @@ -0,0 +1,7 @@ +package sequence + +type Sequencer interface { + NextFileId(count uint64) (uint64, uint64) + SetMax(uint64) + Peek() uint64 +} diff --git a/weed/server/common.go b/weed/server/common.go new file mode 100644 index 000000000..312bcea14 --- /dev/null +++ b/weed/server/common.go @@ -0,0 +1,179 @@ +package weed_server + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "net/http" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var serverStats *stats.ServerStats +var startTime = time.Now() + +func init() { + serverStats = stats.NewServerStats() + go serverStats.Start() + +} + +func writeJson(w http.ResponseWriter, r *http.Request, httpStatus int, obj interface{}) (err error) { + var bytes []byte + if r.FormValue("pretty") != "" { + bytes, err = json.MarshalIndent(obj, "", " ") + } else { + bytes, err = json.Marshal(obj) + } + if err != nil { + return + } + callback := r.FormValue("callback") + if callback == "" { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(httpStatus) + _, err = w.Write(bytes) + } else { + w.Header().Set("Content-Type", "application/javascript") + w.WriteHeader(httpStatus) + if _, err = w.Write([]uint8(callback)); err != nil { + return + } + if _, err = w.Write([]uint8("(")); err != nil { + return + } + fmt.Fprint(w, string(bytes)) + if _, err = w.Write([]uint8(")")); err != nil { + return + } + } + + return +} + +// wrapper for writeJson - just logs errors +func writeJsonQuiet(w http.ResponseWriter, r *http.Request, httpStatus int, obj interface{}) { + if err := writeJson(w, r, httpStatus, obj); err != nil { + glog.V(0).Infof("error writing JSON %s: %v", obj, err) + } +} +func writeJsonError(w http.ResponseWriter, r *http.Request, httpStatus int, err error) { + m := make(map[string]interface{}) + m["error"] = err.Error() + writeJsonQuiet(w, r, httpStatus, m) +} + +func debug(params ...interface{}) { + glog.V(4).Infoln(params) +} + +func submitForClientHandler(w http.ResponseWriter, r *http.Request, masterUrl string) { + jwt := security.GetJwt(r) + m := make(map[string]interface{}) + if r.Method != "POST" { + writeJsonError(w, r, http.StatusMethodNotAllowed, errors.New("Only submit via POST!")) + return + } + + debug("parsing upload file...") + fname, data, mimeType, isGzipped, lastModified, _, _, pe := storage.ParseUpload(r) + if pe != nil { + writeJsonError(w, r, http.StatusBadRequest, pe) + return + } + + debug("assigning file id for", fname) + r.ParseForm() + assignResult, ae := operation.Assign(masterUrl, 1, r.FormValue("replication"), r.FormValue("collection"), r.FormValue("ttl")) + if ae != nil { + writeJsonError(w, r, http.StatusInternalServerError, ae) + return + } + + url := "http://" + assignResult.Url + "/" + assignResult.Fid + if lastModified != 0 { + url = url + "?ts=" + strconv.FormatUint(lastModified, 10) + } + + debug("upload file to store", url) + uploadResult, err := operation.Upload(url, fname, bytes.NewReader(data), isGzipped, mimeType, jwt) + if err != nil { + writeJsonError(w, r, http.StatusInternalServerError, err) + return + } + + m["fileName"] = fname + m["fid"] = assignResult.Fid + m["fileUrl"] = assignResult.PublicUrl + "/" + assignResult.Fid + m["size"] = uploadResult.Size + writeJsonQuiet(w, r, http.StatusCreated, m) + return +} + +func deleteForClientHandler(w http.ResponseWriter, r *http.Request, masterUrl string) { + r.ParseForm() + fids := r.Form["fid"] + ret, err := operation.DeleteFiles(masterUrl, fids) + if err != nil { + writeJsonError(w, r, http.StatusInternalServerError, err) + return + } + writeJsonQuiet(w, r, http.StatusAccepted, ret) +} + +func parseURLPath(path string) (vid, fid, filename, ext string, isVolumeIdOnly bool) { + switch strings.Count(path, "/") { + case 3: + parts := strings.Split(path, "/") + vid, fid, filename = parts[1], parts[2], parts[3] + ext = filepath.Ext(filename) + case 2: + parts := strings.Split(path, "/") + vid, fid = parts[1], parts[2] + dotIndex := strings.LastIndex(fid, ".") + if dotIndex > 0 { + ext = fid[dotIndex:] + fid = fid[0:dotIndex] + } + default: + sepIndex := strings.LastIndex(path, "/") + commaIndex := strings.LastIndex(path[sepIndex:], ",") + if commaIndex <= 0 { + vid, isVolumeIdOnly = path[sepIndex+1:], true + return + } + dotIndex := strings.LastIndex(path[sepIndex:], ".") + vid = path[sepIndex+1 : commaIndex] + fid = path[commaIndex+1:] + ext = "" + if dotIndex > 0 { + fid = path[commaIndex+1 : dotIndex] + ext = path[dotIndex:] + } + } + return +} + +func statsCounterHandler(w http.ResponseWriter, r *http.Request) { + m := make(map[string]interface{}) + m["Version"] = util.VERSION + m["Counters"] = serverStats + writeJsonQuiet(w, r, http.StatusOK, m) +} + +func statsMemoryHandler(w http.ResponseWriter, r *http.Request) { + m := make(map[string]interface{}) + m["Version"] = util.VERSION + m["Memory"] = stats.MemStat() + writeJsonQuiet(w, r, http.StatusOK, m) +} diff --git a/weed/server/filer_server.go b/weed/server/filer_server.go new file mode 100644 index 000000000..ee7eaf886 --- /dev/null +++ b/weed/server/filer_server.go @@ -0,0 +1,67 @@ +package weed_server + +import ( + "net/http" + "strconv" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/filer/cassandra_store" + "github.com/chrislusf/seaweedfs/weed/filer/embedded_filer" + "github.com/chrislusf/seaweedfs/weed/filer/flat_namespace" + "github.com/chrislusf/seaweedfs/weed/filer/redis_store" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/security" +) + +type FilerServer struct { + port string + master string + collection string + defaultReplication string + redirectOnRead bool + disableDirListing bool + secret security.Secret + filer filer.Filer +} + +func NewFilerServer(r *http.ServeMux, ip string, port int, master string, dir string, collection string, + replication string, redirectOnRead bool, disableDirListing bool, + secret string, + cassandra_server string, cassandra_keyspace string, + redis_server string, redis_password string, redis_database int, +) (fs *FilerServer, err error) { + fs = &FilerServer{ + master: master, + collection: collection, + defaultReplication: replication, + redirectOnRead: redirectOnRead, + disableDirListing: disableDirListing, + port: ip + ":" + strconv.Itoa(port), + } + + if cassandra_server != "" { + cassandra_store, err := cassandra_store.NewCassandraStore(cassandra_keyspace, cassandra_server) + if err != nil { + glog.Fatalf("Can not connect to cassandra server %s with keyspace %s: %v", cassandra_server, cassandra_keyspace, err) + } + fs.filer = flat_namespace.NewFlatNamespaceFiler(master, cassandra_store) + } else if redis_server != "" { + redis_store := redis_store.NewRedisStore(redis_server, redis_password, redis_database) + fs.filer = flat_namespace.NewFlatNamespaceFiler(master, redis_store) + } else { + if fs.filer, err = embedded_filer.NewFilerEmbedded(master, dir); err != nil { + glog.Fatalf("Can not start filer in dir %s : %v", dir, err) + return + } + + r.HandleFunc("/admin/mv", fs.moveHandler) + } + + r.HandleFunc("/", fs.filerHandler) + + return fs, nil +} + +func (fs *FilerServer) jwt(fileId string) security.EncodedJwt { + return security.GenJwt(fs.secret, fileId) +} diff --git a/weed/server/filer_server_handlers.go b/weed/server/filer_server_handlers.go new file mode 100644 index 000000000..d6b98976b --- /dev/null +++ b/weed/server/filer_server_handlers.go @@ -0,0 +1,265 @@ +package weed_server + +import ( + "bytes" + "encoding/json" + "errors" + "io" + "io/ioutil" + "net/http" + "net/url" + "strconv" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/syndtr/goleveldb/leveldb" +) + +func (fs *FilerServer) filerHandler(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case "GET": + fs.GetOrHeadHandler(w, r, true) + case "HEAD": + fs.GetOrHeadHandler(w, r, false) + case "DELETE": + fs.DeleteHandler(w, r) + case "PUT": + fs.PostHandler(w, r) + case "POST": + fs.PostHandler(w, r) + } +} + +// listDirectoryHandler lists directories and folers under a directory +// files are sorted by name and paginated via "lastFileName" and "limit". +// sub directories are listed on the first page, when "lastFileName" +// is empty. +func (fs *FilerServer) listDirectoryHandler(w http.ResponseWriter, r *http.Request) { + if !strings.HasSuffix(r.URL.Path, "/") { + return + } + dirlist, err := fs.filer.ListDirectories(r.URL.Path) + if err == leveldb.ErrNotFound { + glog.V(3).Infoln("Directory Not Found in db", r.URL.Path) + w.WriteHeader(http.StatusNotFound) + return + } + m := make(map[string]interface{}) + m["Directory"] = r.URL.Path + lastFileName := r.FormValue("lastFileName") + if lastFileName == "" { + m["Subdirectories"] = dirlist + } + limit, limit_err := strconv.Atoi(r.FormValue("limit")) + if limit_err != nil { + limit = 100 + } + m["Files"], _ = fs.filer.ListFiles(r.URL.Path, lastFileName, limit) + writeJsonQuiet(w, r, http.StatusOK, m) +} + +func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request, isGetMethod bool) { + if strings.HasSuffix(r.URL.Path, "/") { + if fs.disableDirListing { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + fs.listDirectoryHandler(w, r) + return + } + + fileId, err := fs.filer.FindFile(r.URL.Path) + if err == leveldb.ErrNotFound { + glog.V(3).Infoln("Not found in db", r.URL.Path) + w.WriteHeader(http.StatusNotFound) + return + } + + urlLocation, err := operation.LookupFileId(fs.master, fileId) + if err != nil { + glog.V(1).Infoln("operation LookupFileId %s failed, err is %s", fileId, err.Error()) + w.WriteHeader(http.StatusNotFound) + return + } + urlString := urlLocation + if fs.redirectOnRead { + http.Redirect(w, r, urlString, http.StatusFound) + return + } + u, _ := url.Parse(urlString) + request := &http.Request{ + Method: r.Method, + URL: u, + Proto: r.Proto, + ProtoMajor: r.ProtoMajor, + ProtoMinor: r.ProtoMinor, + Header: r.Header, + Body: r.Body, + Host: r.Host, + ContentLength: r.ContentLength, + } + glog.V(3).Infoln("retrieving from", u) + resp, do_err := util.Do(request) + if do_err != nil { + glog.V(0).Infoln("failing to connect to volume server", do_err.Error()) + writeJsonError(w, r, http.StatusInternalServerError, do_err) + return + } + defer resp.Body.Close() + for k, v := range resp.Header { + w.Header()[k] = v + } + w.WriteHeader(resp.StatusCode) + io.Copy(w, resp.Body) +} + +type analogueReader struct { + *bytes.Buffer +} + +// So that it implements the io.ReadCloser interface +func (m analogueReader) Close() error { return nil } + +func (fs *FilerServer) PostHandler(w http.ResponseWriter, r *http.Request) { + query := r.URL.Query() + replication := query.Get("replication") + if replication == "" { + replication = fs.defaultReplication + } + collection := query.Get("collection") + if collection == "" { + collection = fs.collection + } + + var fileId string + var err error + var urlLocation string + if r.Method == "PUT" { + buf, _ := ioutil.ReadAll(r.Body) + r.Body = analogueReader{bytes.NewBuffer(buf)} + fileName, _, _, _, _, _, _, pe := storage.ParseUpload(r) + if pe != nil { + glog.V(0).Infoln("failing to parse post body", pe.Error()) + writeJsonError(w, r, http.StatusInternalServerError, pe) + return + } + //reconstruct http request body for following new request to volume server + r.Body = analogueReader{bytes.NewBuffer(buf)} + + path := r.URL.Path + if strings.HasSuffix(path, "/") { + if fileName != "" { + path += fileName + } + } + + if fileId, err = fs.filer.FindFile(path); err != nil && err != leveldb.ErrNotFound { + glog.V(0).Infoln("failing to find path in filer store", path, err.Error()) + writeJsonError(w, r, http.StatusInternalServerError, err) + return + } else if fileId != "" && err == nil { + var le error + urlLocation, le = operation.LookupFileId(fs.master, fileId) + if le != nil { + glog.V(1).Infoln("operation LookupFileId %s failed, err is %s", fileId, le.Error()) + w.WriteHeader(http.StatusNotFound) + return + } + } + } else { + assignResult, ae := operation.Assign(fs.master, 1, replication, collection, query.Get("ttl")) + if ae != nil { + glog.V(0).Infoln("failing to assign a file id", ae.Error()) + writeJsonError(w, r, http.StatusInternalServerError, ae) + return + } + fileId = assignResult.Fid + urlLocation = "http://" + assignResult.Url + "/" + assignResult.Fid + } + + u, _ := url.Parse(urlLocation) + glog.V(4).Infoln("post to", u) + request := &http.Request{ + Method: r.Method, + URL: u, + Proto: r.Proto, + ProtoMajor: r.ProtoMajor, + ProtoMinor: r.ProtoMinor, + Header: r.Header, + Body: r.Body, + Host: r.Host, + ContentLength: r.ContentLength, + } + resp, do_err := util.Do(request) + if do_err != nil { + glog.V(0).Infoln("failing to connect to volume server", r.RequestURI, do_err.Error()) + writeJsonError(w, r, http.StatusInternalServerError, do_err) + return + } + defer resp.Body.Close() + resp_body, ra_err := ioutil.ReadAll(resp.Body) + if ra_err != nil { + glog.V(0).Infoln("failing to upload to volume server", r.RequestURI, ra_err.Error()) + writeJsonError(w, r, http.StatusInternalServerError, ra_err) + return + } + glog.V(4).Infoln("post result", string(resp_body)) + var ret operation.UploadResult + unmarshal_err := json.Unmarshal(resp_body, &ret) + if unmarshal_err != nil { + glog.V(0).Infoln("failing to read upload resonse", r.RequestURI, string(resp_body)) + writeJsonError(w, r, http.StatusInternalServerError, unmarshal_err) + return + } + if ret.Error != "" { + glog.V(0).Infoln("failing to post to volume server", r.RequestURI, ret.Error) + writeJsonError(w, r, http.StatusInternalServerError, errors.New(ret.Error)) + return + } + path := r.URL.Path + if strings.HasSuffix(path, "/") { + if ret.Name != "" { + path += ret.Name + } else { + operation.DeleteFile(fs.master, fileId, fs.jwt(fileId)) //clean up + glog.V(0).Infoln("Can not to write to folder", path, "without a file name!") + writeJsonError(w, r, http.StatusInternalServerError, + errors.New("Can not to write to folder "+path+" without a file name")) + return + } + } + glog.V(4).Infoln("saving", path, "=>", fileId) + if db_err := fs.filer.CreateFile(path, fileId); db_err != nil { + operation.DeleteFile(fs.master, fileId, fs.jwt(fileId)) //clean up + glog.V(0).Infof("failing to write %s to filer server : %v", path, db_err) + writeJsonError(w, r, http.StatusInternalServerError, db_err) + return + } + w.WriteHeader(http.StatusCreated) + w.Write(resp_body) +} + +// curl -X DELETE http://localhost:8888/path/to +// curl -X DELETE http://localhost:8888/path/to?recursive=true +func (fs *FilerServer) DeleteHandler(w http.ResponseWriter, r *http.Request) { + var err error + var fid string + if strings.HasSuffix(r.URL.Path, "/") { + isRecursive := r.FormValue("recursive") == "true" + err = fs.filer.DeleteDirectory(r.URL.Path, isRecursive) + } else { + fid, err = fs.filer.DeleteFile(r.URL.Path) + if err == nil && fid != "" { + err = operation.DeleteFile(fs.master, fid, fs.jwt(fid)) + } + } + if err == nil { + writeJsonQuiet(w, r, http.StatusAccepted, map[string]string{"error": ""}) + } else { + glog.V(4).Infoln("deleting", r.URL.Path, ":", err.Error()) + writeJsonError(w, r, http.StatusInternalServerError, err) + } +} diff --git a/weed/server/filer_server_handlers_admin.go b/weed/server/filer_server_handlers_admin.go new file mode 100644 index 000000000..979ad517b --- /dev/null +++ b/weed/server/filer_server_handlers_admin.go @@ -0,0 +1,29 @@ +package weed_server + +import ( + "net/http" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +/* +Move a folder or a file, with 4 Use cases: + mv fromDir toNewDir + mv fromDir toOldDir + mv fromFile toDir + mv fromFile toFile + +Wildcard is not supported. + +*/ +func (fs *FilerServer) moveHandler(w http.ResponseWriter, r *http.Request) { + from := r.FormValue("from") + to := r.FormValue("to") + err := fs.filer.Move(from, to) + if err != nil { + glog.V(4).Infoln("moving", from, "->", to, err.Error()) + writeJsonError(w, r, http.StatusInternalServerError, err) + } else { + w.WriteHeader(http.StatusOK) + } +} diff --git a/weed/server/master_server.go b/weed/server/master_server.go new file mode 100644 index 000000000..61bda6988 --- /dev/null +++ b/weed/server/master_server.go @@ -0,0 +1,131 @@ +package weed_server + +import ( + "fmt" + "net/http" + "net/http/httputil" + "net/url" + "sync" + + "github.com/chrislusf/raft" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/sequence" + "github.com/chrislusf/seaweedfs/weed/topology" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/gorilla/mux" +) + +type MasterServer struct { + port int + metaFolder string + volumeSizeLimitMB uint + pulseSeconds int + defaultReplicaPlacement string + garbageThreshold string + guard *security.Guard + + Topo *topology.Topology + vg *topology.VolumeGrowth + vgLock sync.Mutex + + bounedLeaderChan chan int +} + +func NewMasterServer(r *mux.Router, port int, metaFolder string, + volumeSizeLimitMB uint, + pulseSeconds int, + confFile string, + defaultReplicaPlacement string, + garbageThreshold string, + whiteList []string, + secureKey string, +) *MasterServer { + ms := &MasterServer{ + port: port, + volumeSizeLimitMB: volumeSizeLimitMB, + pulseSeconds: pulseSeconds, + defaultReplicaPlacement: defaultReplicaPlacement, + garbageThreshold: garbageThreshold, + } + ms.bounedLeaderChan = make(chan int, 16) + seq := sequence.NewMemorySequencer() + var e error + if ms.Topo, e = topology.NewTopology("topo", confFile, seq, + uint64(volumeSizeLimitMB)*1024*1024, pulseSeconds); e != nil { + glog.Fatalf("cannot create topology:%s", e) + } + ms.vg = topology.NewDefaultVolumeGrowth() + glog.V(0).Infoln("Volume Size Limit is", volumeSizeLimitMB, "MB") + + ms.guard = security.NewGuard(whiteList, secureKey) + + r.HandleFunc("/", ms.uiStatusHandler) + r.HandleFunc("/ui/index.html", ms.uiStatusHandler) + r.HandleFunc("/dir/assign", ms.proxyToLeader(ms.guard.WhiteList(ms.dirAssignHandler))) + r.HandleFunc("/dir/lookup", ms.proxyToLeader(ms.guard.WhiteList(ms.dirLookupHandler))) + r.HandleFunc("/dir/join", ms.proxyToLeader(ms.guard.WhiteList(ms.dirJoinHandler))) + r.HandleFunc("/dir/status", ms.proxyToLeader(ms.guard.WhiteList(ms.dirStatusHandler))) + r.HandleFunc("/col/delete", ms.proxyToLeader(ms.guard.WhiteList(ms.collectionDeleteHandler))) + r.HandleFunc("/vol/lookup", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeLookupHandler))) + r.HandleFunc("/vol/grow", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeGrowHandler))) + r.HandleFunc("/vol/status", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeStatusHandler))) + r.HandleFunc("/vol/vacuum", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeVacuumHandler))) + r.HandleFunc("/submit", ms.guard.WhiteList(ms.submitFromMasterServerHandler)) + r.HandleFunc("/delete", ms.guard.WhiteList(ms.deleteFromMasterServerHandler)) + r.HandleFunc("/{fileId}", ms.proxyToLeader(ms.redirectHandler)) + r.HandleFunc("/stats/counter", ms.guard.WhiteList(statsCounterHandler)) + r.HandleFunc("/stats/memory", ms.guard.WhiteList(statsMemoryHandler)) + + ms.Topo.StartRefreshWritableVolumes(garbageThreshold) + + return ms +} + +func (ms *MasterServer) SetRaftServer(raftServer *RaftServer) { + ms.Topo.RaftServer = raftServer.raftServer + ms.Topo.RaftServer.AddEventListener(raft.LeaderChangeEventType, func(e raft.Event) { + if ms.Topo.RaftServer.Leader() != "" { + glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "becomes leader.") + } + }) + if ms.Topo.IsLeader() { + glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", "I am the leader!") + } else { + if ms.Topo.RaftServer.Leader() != "" { + glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "is the leader.") + } + } +} + +func (ms *MasterServer) proxyToLeader(f func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + if ms.Topo.IsLeader() { + f(w, r) + } else if ms.Topo.RaftServer != nil && ms.Topo.RaftServer.Leader() != "" { + ms.bounedLeaderChan <- 1 + defer func() { <-ms.bounedLeaderChan }() + targetUrl, err := url.Parse("http://" + ms.Topo.RaftServer.Leader()) + if err != nil { + writeJsonError(w, r, http.StatusInternalServerError, + fmt.Errorf("Leader URL http://%s Parse Error: %v", ms.Topo.RaftServer.Leader(), err)) + return + } + glog.V(4).Infoln("proxying to leader", ms.Topo.RaftServer.Leader()) + proxy := httputil.NewSingleHostReverseProxy(targetUrl) + director := proxy.Director + proxy.Director = func(req *http.Request) { + actualHost, err := security.GetActualRemoteHost(req) + if err == nil { + req.Header.Set("HTTP_X_FORWARDED_FOR", actualHost) + } + director(req) + } + proxy.Transport = util.Transport + proxy.ServeHTTP(w, r) + } else { + //drop it to the floor + //writeJsonError(w, r, errors.New(ms.Topo.RaftServer.Name()+" does not know Leader yet:"+ms.Topo.RaftServer.Leader())) + } + } +} diff --git a/weed/server/master_server_handlers.go b/weed/server/master_server_handlers.go new file mode 100644 index 000000000..e811631f8 --- /dev/null +++ b/weed/server/master_server_handlers.go @@ -0,0 +1,104 @@ +package weed_server + +import ( + "fmt" + "net/http" + "strconv" + "strings" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +func (ms *MasterServer) lookupVolumeId(vids []string, collection string) (volumeLocations map[string]operation.LookupResult) { + volumeLocations = make(map[string]operation.LookupResult) + for _, vid := range vids { + commaSep := strings.Index(vid, ",") + if commaSep > 0 { + vid = vid[0:commaSep] + } + if _, ok := volumeLocations[vid]; ok { + continue + } + volumeId, err := storage.NewVolumeId(vid) + if err == nil { + machines := ms.Topo.Lookup(collection, volumeId) + if machines != nil { + var ret []operation.Location + for _, dn := range machines { + ret = append(ret, operation.Location{Url: dn.Url(), PublicUrl: dn.PublicUrl}) + } + volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Locations: ret} + } else { + volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Error: "volumeId not found."} + } + } else { + volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Error: "Unknown volumeId format."} + } + } + return +} + +// Takes one volumeId only, can not do batch lookup +func (ms *MasterServer) dirLookupHandler(w http.ResponseWriter, r *http.Request) { + vid := r.FormValue("volumeId") + commaSep := strings.Index(vid, ",") + if commaSep > 0 { + vid = vid[0:commaSep] + } + vids := []string{vid} + collection := r.FormValue("collection") //optional, but can be faster if too many collections + volumeLocations := ms.lookupVolumeId(vids, collection) + location := volumeLocations[vid] + httpStatus := http.StatusOK + if location.Error != "" { + httpStatus = http.StatusNotFound + } + writeJsonQuiet(w, r, httpStatus, location) +} + +// This can take batched volumeIds, &volumeId=x&volumeId=y&volumeId=z +func (ms *MasterServer) volumeLookupHandler(w http.ResponseWriter, r *http.Request) { + r.ParseForm() + vids := r.Form["volumeId"] + collection := r.FormValue("collection") //optional, but can be faster if too many collections + volumeLocations := ms.lookupVolumeId(vids, collection) + writeJsonQuiet(w, r, http.StatusOK, volumeLocations) +} + +func (ms *MasterServer) dirAssignHandler(w http.ResponseWriter, r *http.Request) { + stats.AssignRequest() + requestedCount, e := strconv.ParseUint(r.FormValue("count"), 10, 64) + if e != nil || requestedCount == 0 { + requestedCount = 1 + } + + option, err := ms.getVolumeGrowOption(r) + if err != nil { + writeJsonQuiet(w, r, http.StatusNotAcceptable, operation.AssignResult{Error: err.Error()}) + return + } + + if !ms.Topo.HasWritableVolume(option) { + if ms.Topo.FreeSpace() <= 0 { + writeJsonQuiet(w, r, http.StatusNotFound, operation.AssignResult{Error: "No free volumes left!"}) + return + } + ms.vgLock.Lock() + defer ms.vgLock.Unlock() + if !ms.Topo.HasWritableVolume(option) { + if _, err = ms.vg.AutomaticGrowByType(option, ms.Topo); err != nil { + writeJsonError(w, r, http.StatusInternalServerError, + fmt.Errorf("Cannot grow volume group! %v", err)) + return + } + } + } + fid, count, dn, err := ms.Topo.PickForWrite(requestedCount, option) + if err == nil { + writeJsonQuiet(w, r, http.StatusOK, operation.AssignResult{Fid: fid, Url: dn.Url(), PublicUrl: dn.PublicUrl, Count: count}) + } else { + writeJsonQuiet(w, r, http.StatusNotAcceptable, operation.AssignResult{Error: err.Error()}) + } +} diff --git a/weed/server/master_server_handlers_admin.go b/weed/server/master_server_handlers_admin.go new file mode 100644 index 000000000..a762bf416 --- /dev/null +++ b/weed/server/master_server_handlers_admin.go @@ -0,0 +1,193 @@ +package weed_server + +import ( + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "math/rand" + "net/http" + "strconv" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/topology" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/golang/protobuf/proto" +) + +func (ms *MasterServer) collectionDeleteHandler(w http.ResponseWriter, r *http.Request) { + collection, ok := ms.Topo.FindCollection(r.FormValue("collection")) + if !ok { + writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("collection %s does not exist", r.FormValue("collection"))) + return + } + for _, server := range collection.ListVolumeServers() { + _, err := util.Get("http://" + server.Ip + ":" + strconv.Itoa(server.Port) + "/admin/delete_collection?collection=" + r.FormValue("collection")) + if err != nil { + writeJsonError(w, r, http.StatusInternalServerError, err) + return + } + } + ms.Topo.DeleteCollection(r.FormValue("collection")) +} + +func (ms *MasterServer) dirJoinHandler(w http.ResponseWriter, r *http.Request) { + body, err := ioutil.ReadAll(r.Body) + if err != nil { + writeJsonError(w, r, http.StatusBadRequest, err) + return + } + joinMessage := &operation.JoinMessage{} + if err = proto.Unmarshal(body, joinMessage); err != nil { + writeJsonError(w, r, http.StatusBadRequest, err) + return + } + if *joinMessage.Ip == "" { + *joinMessage.Ip = r.RemoteAddr[0:strings.LastIndex(r.RemoteAddr, ":")] + } + if glog.V(4) { + if jsonData, jsonError := json.Marshal(joinMessage); jsonError != nil { + glog.V(0).Infoln("json marshaling error: ", jsonError) + writeJsonError(w, r, http.StatusBadRequest, jsonError) + return + } else { + glog.V(4).Infoln("Proto size", len(body), "json size", len(jsonData), string(jsonData)) + } + } + + ms.Topo.ProcessJoinMessage(joinMessage) + writeJsonQuiet(w, r, http.StatusOK, operation.JoinResult{ + VolumeSizeLimit: uint64(ms.volumeSizeLimitMB) * 1024 * 1024, + SecretKey: string(ms.guard.SecretKey), + }) +} + +func (ms *MasterServer) dirStatusHandler(w http.ResponseWriter, r *http.Request) { + m := make(map[string]interface{}) + m["Version"] = util.VERSION + m["Topology"] = ms.Topo.ToMap() + writeJsonQuiet(w, r, http.StatusOK, m) +} + +func (ms *MasterServer) volumeVacuumHandler(w http.ResponseWriter, r *http.Request) { + gcThreshold := r.FormValue("garbageThreshold") + if gcThreshold == "" { + gcThreshold = ms.garbageThreshold + } + glog.Infoln("garbageThreshold =", gcThreshold) + ms.Topo.Vacuum(gcThreshold) + ms.dirStatusHandler(w, r) +} + +func (ms *MasterServer) volumeGrowHandler(w http.ResponseWriter, r *http.Request) { + count := 0 + option, err := ms.getVolumeGrowOption(r) + if err != nil { + writeJsonError(w, r, http.StatusNotAcceptable, err) + return + } + if err == nil { + if count, err = strconv.Atoi(r.FormValue("count")); err == nil { + if ms.Topo.FreeSpace() < count*option.ReplicaPlacement.GetCopyCount() { + err = errors.New("Only " + strconv.Itoa(ms.Topo.FreeSpace()) + " volumes left! Not enough for " + strconv.Itoa(count*option.ReplicaPlacement.GetCopyCount())) + } else { + count, err = ms.vg.GrowByCountAndType(count, option, ms.Topo) + } + } else { + err = errors.New("parameter count is not found") + } + } + if err != nil { + writeJsonError(w, r, http.StatusNotAcceptable, err) + } else { + writeJsonQuiet(w, r, http.StatusOK, map[string]interface{}{"count": count}) + } +} + +func (ms *MasterServer) volumeStatusHandler(w http.ResponseWriter, r *http.Request) { + m := make(map[string]interface{}) + m["Version"] = util.VERSION + m["Volumes"] = ms.Topo.ToVolumeMap() + writeJsonQuiet(w, r, http.StatusOK, m) +} + +func (ms *MasterServer) redirectHandler(w http.ResponseWriter, r *http.Request) { + vid, _, _, _, _ := parseURLPath(r.URL.Path) + volumeId, err := storage.NewVolumeId(vid) + if err != nil { + debug("parsing error:", err, r.URL.Path) + return + } + collection := r.FormValue("collection") + machines := ms.Topo.Lookup(collection, volumeId) + if machines != nil && len(machines) > 0 { + var url string + if r.URL.RawQuery != "" { + url = util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl) + r.URL.Path + "?" + r.URL.RawQuery + } else { + url = util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl) + r.URL.Path + } + http.Redirect(w, r, url, http.StatusMovedPermanently) + } else { + writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("volume id %d or collection %s not found", volumeId, collection)) + } +} + +func (ms *MasterServer) selfUrl(r *http.Request) string { + if r.Host != "" { + return r.Host + } + return "localhost:" + strconv.Itoa(ms.port) +} +func (ms *MasterServer) submitFromMasterServerHandler(w http.ResponseWriter, r *http.Request) { + if ms.Topo.IsLeader() { + submitForClientHandler(w, r, ms.selfUrl(r)) + } else { + masterUrl, err := ms.Topo.Leader() + if err != nil { + writeJsonError(w, r, http.StatusInternalServerError, err) + } else { + submitForClientHandler(w, r, masterUrl) + } + } +} + +func (ms *MasterServer) deleteFromMasterServerHandler(w http.ResponseWriter, r *http.Request) { + if ms.Topo.IsLeader() { + deleteForClientHandler(w, r, ms.selfUrl(r)) + } else { + deleteForClientHandler(w, r, ms.Topo.RaftServer.Leader()) + } +} + +func (ms *MasterServer) HasWritableVolume(option *topology.VolumeGrowOption) bool { + vl := ms.Topo.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl) + return vl.GetActiveVolumeCount(option) > 0 +} + +func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGrowOption, error) { + replicationString := r.FormValue("replication") + if replicationString == "" { + replicationString = ms.defaultReplicaPlacement + } + replicaPlacement, err := storage.NewReplicaPlacementFromString(replicationString) + if err != nil { + return nil, err + } + ttl, err := storage.ReadTTL(r.FormValue("ttl")) + if err != nil { + return nil, err + } + volumeGrowOption := &topology.VolumeGrowOption{ + Collection: r.FormValue("collection"), + ReplicaPlacement: replicaPlacement, + Ttl: ttl, + DataCenter: r.FormValue("dataCenter"), + Rack: r.FormValue("rack"), + DataNode: r.FormValue("dataNode"), + } + return volumeGrowOption, nil +} diff --git a/weed/server/master_server_handlers_ui.go b/weed/server/master_server_handlers_ui.go new file mode 100644 index 000000000..9ad234877 --- /dev/null +++ b/weed/server/master_server_handlers_ui.go @@ -0,0 +1,30 @@ +package weed_server + +import ( + "net/http" + + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/util" + ui "github.com/chrislusf/seaweedfs/weed/server/master_ui" +) + +func (ms *MasterServer) uiStatusHandler(w http.ResponseWriter, r *http.Request) { + infos := make(map[string]interface{}) + infos["Version"] = util.VERSION + args := struct { + Version string + Topology interface{} + Leader string + Peers interface{} + Stats map[string]interface{} + Counters *stats.ServerStats + }{ + util.VERSION, + ms.Topo.ToMap(), + ms.Topo.RaftServer.Leader(), + ms.Topo.RaftServer.Peers(), + infos, + serverStats, + } + ui.StatusTpl.Execute(w, args) +} diff --git a/weed/server/master_ui/templates.go b/weed/server/master_ui/templates.go new file mode 100644 index 000000000..e9ee2d8d2 --- /dev/null +++ b/weed/server/master_ui/templates.go @@ -0,0 +1,102 @@ +package master_ui + +import ( + "html/template" +) + +var StatusTpl = template.Must(template.New("status").Parse(`<!DOCTYPE html> +<html> + <head> + <title>SeaweedFS {{ .Version }}</title> + <link rel="icon" href="http://7viirv.com1.z0.glb.clouddn.com/seaweed_favicon.png" sizes="32x32" /> + <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css"> + </head> + <body> + <div class="container"> + <div class="page-header"> + <h1> + <img src="http://7viirv.com1.z0.glb.clouddn.com/seaweed50x50.png"></img> + SeaweedFS <small>{{ .Version }}</small> + </h1> + </div> + + <div class="row"> + <div class="col-sm-6"> + <h2>Cluster status</h2> + <table class="table"> + <tbody> + <tr> + <th>Free</th> + <td>{{ .Topology.Free }}</td> + </tr> + <tr> + <th>Max</th> + <td>{{ .Topology.Max }}</td> + </tr> + <tr> + <th>Leader</th> + <td><a href="http://{{ .Leader }}">{{ .Leader }}</a></td> + </tr> + <tr> + <td class="col-sm-2 field-label"><label>Peers:</label></td> + <td class="col-sm-10"><ul class="list-unstyled"> + {{ range $k, $p := .Peers }} + <li><a href="{{ $p.ConnectionString }}">{{ $p.Name }}</a></li> + {{ end }} + </ul></td> + </tr> + </tbody> + </table> + </div> + + <div class="col-sm-6"> + <h2>System Stats</h2> + <table class="table table-condensed table-striped"> + <tr> + <th>Concurrent Connections</th> + <td>{{ .Counters.Connections.WeekCounter.Sum }}</td> + </tr> + {{ range $key, $val := .Stats }} + <tr> + <th>{{ $key }}</th> + <td>{{ $val }}</td> + </tr> + {{ end }} + </table> + </div> + </div> + + <div class="row"> + <h2>Topology</h2> + <table class="table table-striped"> + <thead> + <tr> + <th>Data Center</th> + <th>Rack</th> + <th>RemoteAddr</th> + <th>#Volumes</th> + <th>Max</th> + </tr> + </thead> + <tbody> + {{ range $dc_index, $dc := .Topology.DataCenters }} + {{ range $rack_index, $rack := $dc.Racks }} + {{ range $dn_index, $dn := $rack.DataNodes }} + <tr> + <td><code>{{ $dc.Id }}</code></td> + <td>{{ $rack.Id }}</td> + <td><a href="http://{{ $dn.Url }}/ui/index.html">{{ $dn.Url }}</a></td> + <td>{{ $dn.Volumes }}</td> + <td>{{ $dn.Max }}</td> + </tr> + {{ end }} + {{ end }} + {{ end }} + </tbody> + </table> + </div> + + </div> + </body> +</html> +`)) diff --git a/weed/server/raft_server.go b/weed/server/raft_server.go new file mode 100644 index 000000000..a35659818 --- /dev/null +++ b/weed/server/raft_server.go @@ -0,0 +1,217 @@ +package weed_server + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "math/rand" + "net/http" + "net/url" + "os" + "path" + "reflect" + "sort" + "strings" + "time" + + "github.com/chrislusf/raft" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/topology" + "github.com/gorilla/mux" +) + +type RaftServer struct { + peers []string // initial peers to join with + raftServer raft.Server + dataDir string + httpAddr string + router *mux.Router + topo *topology.Topology +} + +func NewRaftServer(r *mux.Router, peers []string, httpAddr string, dataDir string, topo *topology.Topology, pulseSeconds int) *RaftServer { + s := &RaftServer{ + peers: peers, + httpAddr: httpAddr, + dataDir: dataDir, + router: r, + topo: topo, + } + + if glog.V(4) { + raft.SetLogLevel(2) + } + + raft.RegisterCommand(&topology.MaxVolumeIdCommand{}) + + var err error + transporter := raft.NewHTTPTransporter("/cluster", 0) + transporter.Transport.MaxIdleConnsPerHost = 1024 + glog.V(1).Infof("Starting RaftServer with IP:%v:", httpAddr) + + // Clear old cluster configurations if peers are changed + if oldPeers, changed := isPeersChanged(s.dataDir, httpAddr, s.peers); changed { + glog.V(0).Infof("Peers Change: %v => %v", oldPeers, s.peers) + os.RemoveAll(path.Join(s.dataDir, "conf")) + os.RemoveAll(path.Join(s.dataDir, "log")) + os.RemoveAll(path.Join(s.dataDir, "snapshot")) + } + + s.raftServer, err = raft.NewServer(s.httpAddr, s.dataDir, transporter, nil, topo, "") + if err != nil { + glog.V(0).Infoln(err) + return nil + } + transporter.Install(s.raftServer, s) + s.raftServer.SetHeartbeatInterval(1 * time.Second) + s.raftServer.SetElectionTimeout(time.Duration(pulseSeconds) * 3450 * time.Millisecond) + s.raftServer.Start() + + s.router.HandleFunc("/cluster/join", s.joinHandler).Methods("POST") + s.router.HandleFunc("/cluster/status", s.statusHandler).Methods("GET") + + if len(s.peers) > 0 { + // Join to leader if specified. + for { + glog.V(0).Infoln("Joining cluster:", strings.Join(s.peers, ",")) + time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond) + firstJoinError := s.Join(s.peers) + if firstJoinError != nil { + glog.V(0).Infoln("No existing server found. Starting as leader in the new cluster.") + _, err := s.raftServer.Do(&raft.DefaultJoinCommand{ + Name: s.raftServer.Name(), + ConnectionString: "http://" + s.httpAddr, + }) + if err != nil { + glog.V(0).Infoln(err) + } else { + break + } + } else { + break + } + } + } else if s.raftServer.IsLogEmpty() { + // Initialize the server by joining itself. + glog.V(0).Infoln("Initializing new cluster") + + _, err := s.raftServer.Do(&raft.DefaultJoinCommand{ + Name: s.raftServer.Name(), + ConnectionString: "http://" + s.httpAddr, + }) + + if err != nil { + glog.V(0).Infoln(err) + return nil + } + + } else { + glog.V(0).Infoln("Old conf,log,snapshot should have been removed.") + } + + return s +} + +func (s *RaftServer) Peers() (members []string) { + peers := s.raftServer.Peers() + + for _, p := range peers { + members = append(members, strings.TrimPrefix(p.ConnectionString, "http://")) + } + + return +} + +func isPeersChanged(dir string, self string, peers []string) (oldPeers []string, changed bool) { + confPath := path.Join(dir, "conf") + // open conf file + b, err := ioutil.ReadFile(confPath) + if err != nil { + return oldPeers, true + } + conf := &raft.Config{} + if err = json.Unmarshal(b, conf); err != nil { + return oldPeers, true + } + + for _, p := range conf.Peers { + oldPeers = append(oldPeers, strings.TrimPrefix(p.ConnectionString, "http://")) + } + oldPeers = append(oldPeers, self) + + sort.Strings(peers) + sort.Strings(oldPeers) + + return oldPeers, reflect.DeepEqual(peers, oldPeers) + +} + +// Join joins an existing cluster. +func (s *RaftServer) Join(peers []string) error { + command := &raft.DefaultJoinCommand{ + Name: s.raftServer.Name(), + ConnectionString: "http://" + s.httpAddr, + } + + var err error + var b bytes.Buffer + json.NewEncoder(&b).Encode(command) + for _, m := range peers { + if m == s.httpAddr { + continue + } + target := fmt.Sprintf("http://%s/cluster/join", strings.TrimSpace(m)) + glog.V(0).Infoln("Attempting to connect to:", target) + + err = postFollowingOneRedirect(target, "application/json", &b) + + if err != nil { + glog.V(0).Infoln("Post returned error: ", err.Error()) + if _, ok := err.(*url.Error); ok { + // If we receive a network error try the next member + continue + } + } else { + return nil + } + } + + return errors.New("Could not connect to any cluster peers") +} + +// a workaround because http POST following redirection misses request body +func postFollowingOneRedirect(target string, contentType string, b *bytes.Buffer) error { + backupReader := bytes.NewReader(b.Bytes()) + resp, err := http.Post(target, contentType, b) + if err != nil { + return err + } + defer resp.Body.Close() + reply, _ := ioutil.ReadAll(resp.Body) + statusCode := resp.StatusCode + + if statusCode == http.StatusMovedPermanently { + var urlStr string + if urlStr = resp.Header.Get("Location"); urlStr == "" { + return fmt.Errorf("%d response missing Location header", resp.StatusCode) + } + + glog.V(0).Infoln("Post redirected to ", urlStr) + resp2, err2 := http.Post(urlStr, contentType, backupReader) + if err2 != nil { + return err2 + } + defer resp2.Body.Close() + reply, _ = ioutil.ReadAll(resp2.Body) + statusCode = resp2.StatusCode + } + + glog.V(0).Infoln("Post returned status: ", statusCode, string(reply)) + if statusCode != http.StatusOK { + return errors.New(string(reply)) + } + + return nil +} diff --git a/weed/server/raft_server_handlers.go b/weed/server/raft_server_handlers.go new file mode 100644 index 000000000..335ba668f --- /dev/null +++ b/weed/server/raft_server_handlers.go @@ -0,0 +1,64 @@ +package weed_server + +import ( + "encoding/json" + "io/ioutil" + "net/http" + "strings" + + "github.com/chrislusf/raft" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" +) + +// Handles incoming RAFT joins. +func (s *RaftServer) joinHandler(w http.ResponseWriter, req *http.Request) { + glog.V(0).Infoln("Processing incoming join. Current Leader", s.raftServer.Leader(), "Self", s.raftServer.Name(), "Peers", s.raftServer.Peers()) + command := &raft.DefaultJoinCommand{} + + commandText, _ := ioutil.ReadAll(req.Body) + glog.V(0).Info("Command:", string(commandText)) + if err := json.NewDecoder(strings.NewReader(string(commandText))).Decode(&command); err != nil { + glog.V(0).Infoln("Error decoding json message:", err, string(commandText)) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + glog.V(0).Infoln("join command from Name", command.Name, "Connection", command.ConnectionString) + + if _, err := s.raftServer.Do(command); err != nil { + switch err { + case raft.NotLeaderError: + s.redirectToLeader(w, req) + default: + glog.V(0).Infoln("Error processing join:", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + } + } +} + +func (s *RaftServer) HandleFunc(pattern string, handler func(http.ResponseWriter, *http.Request)) { + s.router.HandleFunc(pattern, handler) +} + +func (s *RaftServer) redirectToLeader(w http.ResponseWriter, req *http.Request) { + if leader, e := s.topo.Leader(); e == nil { + //http.StatusMovedPermanently does not cause http POST following redirection + glog.V(0).Infoln("Redirecting to", http.StatusMovedPermanently, "http://"+leader+req.URL.Path) + http.Redirect(w, req, "http://"+leader+req.URL.Path, http.StatusMovedPermanently) + } else { + glog.V(0).Infoln("Error: Leader Unknown") + http.Error(w, "Leader unknown", http.StatusInternalServerError) + } +} + +func (s *RaftServer) statusHandler(w http.ResponseWriter, r *http.Request) { + ret := operation.ClusterStatusResult{ + IsLeader: s.topo.IsLeader(), + Peers: s.Peers(), + } + if leader, e := s.topo.Leader(); e == nil { + ret.Leader = leader + } + writeJsonQuiet(w, r, http.StatusOK, ret) +} diff --git a/weed/server/volume_server.go b/weed/server/volume_server.go new file mode 100644 index 000000000..79a4276b1 --- /dev/null +++ b/weed/server/volume_server.go @@ -0,0 +1,125 @@ +package weed_server + +import ( + "math/rand" + "net/http" + "sync" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +type VolumeServer struct { + masterNode string + mnLock sync.RWMutex + pulseSeconds int + dataCenter string + rack string + store *storage.Store + guard *security.Guard + + needleMapKind storage.NeedleMapType + FixJpgOrientation bool + ReadRedirect bool +} + +func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, + port int, publicUrl string, + folders []string, maxCounts []int, + needleMapKind storage.NeedleMapType, + masterNode string, pulseSeconds int, + dataCenter string, rack string, + whiteList []string, + fixJpgOrientation bool, + readRedirect bool) *VolumeServer { + vs := &VolumeServer{ + pulseSeconds: pulseSeconds, + dataCenter: dataCenter, + rack: rack, + needleMapKind: needleMapKind, + FixJpgOrientation: fixJpgOrientation, + ReadRedirect: readRedirect, + } + vs.SetMasterNode(masterNode) + vs.store = storage.NewStore(port, ip, publicUrl, folders, maxCounts, vs.needleMapKind) + + vs.guard = security.NewGuard(whiteList, "") + + adminMux.HandleFunc("/ui/index.html", vs.uiStatusHandler) + adminMux.HandleFunc("/status", vs.guard.WhiteList(vs.statusHandler)) + adminMux.HandleFunc("/admin/assign_volume", vs.guard.WhiteList(vs.assignVolumeHandler)) + adminMux.HandleFunc("/admin/vacuum/check", vs.guard.WhiteList(vs.vacuumVolumeCheckHandler)) + adminMux.HandleFunc("/admin/vacuum/compact", vs.guard.WhiteList(vs.vacuumVolumeCompactHandler)) + adminMux.HandleFunc("/admin/vacuum/commit", vs.guard.WhiteList(vs.vacuumVolumeCommitHandler)) + adminMux.HandleFunc("/admin/delete_collection", vs.guard.WhiteList(vs.deleteCollectionHandler)) + adminMux.HandleFunc("/admin/sync/status", vs.guard.WhiteList(vs.getVolumeSyncStatusHandler)) + adminMux.HandleFunc("/admin/sync/index", vs.guard.WhiteList(vs.getVolumeIndexContentHandler)) + adminMux.HandleFunc("/admin/sync/data", vs.guard.WhiteList(vs.getVolumeDataContentHandler)) + adminMux.HandleFunc("/stats/counter", vs.guard.WhiteList(statsCounterHandler)) + adminMux.HandleFunc("/stats/memory", vs.guard.WhiteList(statsMemoryHandler)) + adminMux.HandleFunc("/stats/disk", vs.guard.WhiteList(vs.statsDiskHandler)) + adminMux.HandleFunc("/delete", vs.guard.WhiteList(vs.batchDeleteHandler)) + adminMux.HandleFunc("/", vs.privateStoreHandler) + if publicMux != adminMux { + // separated admin and public port + publicMux.HandleFunc("/favicon.ico", vs.faviconHandler) + publicMux.HandleFunc("/", vs.publicReadOnlyHandler) + } + + go func() { + connected := true + + glog.V(0).Infof("Volume server bootstraps with master %s", vs.GetMasterNode()) + vs.store.SetBootstrapMaster(vs.GetMasterNode()) + vs.store.SetDataCenter(vs.dataCenter) + vs.store.SetRack(vs.rack) + for { + glog.V(4).Infof("Volume server sending to master %s", vs.GetMasterNode()) + master, secretKey, err := vs.store.SendHeartbeatToMaster() + if err == nil { + if !connected { + connected = true + vs.SetMasterNode(master) + vs.guard.SecretKey = secretKey + glog.V(0).Infoln("Volume Server Connected with master at", master) + } + } else { + glog.V(1).Infof("Volume Server Failed to talk with master %s: %v", vs.masterNode, err) + if connected { + connected = false + } + } + if connected { + time.Sleep(time.Duration(float32(vs.pulseSeconds*1e3)*(1+rand.Float32())) * time.Millisecond) + } else { + time.Sleep(time.Duration(float32(vs.pulseSeconds*1e3)*0.25) * time.Millisecond) + } + } + }() + + return vs +} + +func (vs *VolumeServer) GetMasterNode() string { + vs.mnLock.RLock() + defer vs.mnLock.RUnlock() + return vs.masterNode +} + +func (vs *VolumeServer) SetMasterNode(masterNode string) { + vs.mnLock.Lock() + defer vs.mnLock.Unlock() + vs.masterNode = masterNode +} + +func (vs *VolumeServer) Shutdown() { + glog.V(0).Infoln("Shutting down volume server...") + vs.store.Close() + glog.V(0).Infoln("Shut down successfully!") +} + +func (vs *VolumeServer) jwt(fileId string) security.EncodedJwt { + return security.GenJwt(vs.guard.SecretKey, fileId) +} diff --git a/weed/server/volume_server_handlers.go b/weed/server/volume_server_handlers.go new file mode 100644 index 000000000..2d6fe7849 --- /dev/null +++ b/weed/server/volume_server_handlers.go @@ -0,0 +1,57 @@ +package weed_server + +import ( + "net/http" + + "github.com/chrislusf/seaweedfs/weed/stats" +) + +/* + +If volume server is started with a separated public port, the public port will +be more "secure". + +Public port currently only supports reads. + +Later writes on public port can have one of the 3 +security settings: +1. not secured +2. secured by white list +3. secured by JWT(Json Web Token) + +*/ + +func (vs *VolumeServer) privateStoreHandler(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case "GET": + stats.ReadRequest() + vs.GetOrHeadHandler(w, r) + case "HEAD": + stats.ReadRequest() + vs.GetOrHeadHandler(w, r) + case "DELETE": + stats.DeleteRequest() + vs.guard.WhiteList(vs.DeleteHandler)(w, r) + case "PUT": + stats.WriteRequest() + vs.guard.WhiteList(vs.PostHandler)(w, r) + case "POST": + stats.WriteRequest() + vs.guard.WhiteList(vs.PostHandler)(w, r) + } +} + +func (vs *VolumeServer) publicReadOnlyHandler(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case "GET": + stats.ReadRequest() + vs.GetOrHeadHandler(w, r) + case "HEAD": + stats.ReadRequest() + vs.GetOrHeadHandler(w, r) + } +} + +func (vs *VolumeServer) faviconHandler(w http.ResponseWriter, r *http.Request) { + vs.FaviconHandler(w, r) +} diff --git a/weed/server/volume_server_handlers_admin.go b/weed/server/volume_server_handlers_admin.go new file mode 100644 index 000000000..ae9817ef6 --- /dev/null +++ b/weed/server/volume_server_handlers_admin.go @@ -0,0 +1,50 @@ +package weed_server + +import ( + "net/http" + "path/filepath" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func (vs *VolumeServer) statusHandler(w http.ResponseWriter, r *http.Request) { + m := make(map[string]interface{}) + m["Version"] = util.VERSION + m["Volumes"] = vs.store.Status() + writeJsonQuiet(w, r, http.StatusOK, m) +} + +func (vs *VolumeServer) assignVolumeHandler(w http.ResponseWriter, r *http.Request) { + err := vs.store.AddVolume(r.FormValue("volume"), r.FormValue("collection"), vs.needleMapKind, r.FormValue("replication"), r.FormValue("ttl")) + if err == nil { + writeJsonQuiet(w, r, http.StatusAccepted, map[string]string{"error": ""}) + } else { + writeJsonError(w, r, http.StatusNotAcceptable, err) + } + glog.V(2).Infoln("assign volume =", r.FormValue("volume"), ", collection =", r.FormValue("collection"), ", replication =", r.FormValue("replication"), ", error =", err) +} + +func (vs *VolumeServer) deleteCollectionHandler(w http.ResponseWriter, r *http.Request) { + err := vs.store.DeleteCollection(r.FormValue("collection")) + if err == nil { + writeJsonQuiet(w, r, http.StatusOK, map[string]string{"error": ""}) + } else { + writeJsonError(w, r, http.StatusInternalServerError, err) + } + glog.V(2).Infoln("deleting collection =", r.FormValue("collection"), ", error =", err) +} + +func (vs *VolumeServer) statsDiskHandler(w http.ResponseWriter, r *http.Request) { + m := make(map[string]interface{}) + m["Version"] = util.VERSION + var ds []*stats.DiskStatus + for _, loc := range vs.store.Locations { + if dir, e := filepath.Abs(loc.Directory); e == nil { + ds = append(ds, stats.NewDiskStatus(dir)) + } + } + m["DiskStatuses"] = ds + writeJsonQuiet(w, r, http.StatusOK, m) +} diff --git a/weed/server/volume_server_handlers_helper.go b/weed/server/volume_server_handlers_helper.go new file mode 100644 index 000000000..2bab35e45 --- /dev/null +++ b/weed/server/volume_server_handlers_helper.go @@ -0,0 +1,115 @@ +package weed_server + +import ( + "errors" + "fmt" + "mime/multipart" + "net/textproto" + "strconv" + "strings" +) + +// copied from src/pkg/net/http/fs.go + +// httpRange specifies the byte range to be sent to the client. +type httpRange struct { + start, length int64 +} + +func (r httpRange) contentRange(size int64) string { + return fmt.Sprintf("bytes %d-%d/%d", r.start, r.start+r.length-1, size) +} + +func (r httpRange) mimeHeader(contentType string, size int64) textproto.MIMEHeader { + return textproto.MIMEHeader{ + "Content-Range": {r.contentRange(size)}, + "Content-Type": {contentType}, + } +} + +// parseRange parses a Range header string as per RFC 2616. +func parseRange(s string, size int64) ([]httpRange, error) { + if s == "" { + return nil, nil // header not present + } + const b = "bytes=" + if !strings.HasPrefix(s, b) { + return nil, errors.New("invalid range") + } + var ranges []httpRange + for _, ra := range strings.Split(s[len(b):], ",") { + ra = strings.TrimSpace(ra) + if ra == "" { + continue + } + i := strings.Index(ra, "-") + if i < 0 { + return nil, errors.New("invalid range") + } + start, end := strings.TrimSpace(ra[:i]), strings.TrimSpace(ra[i+1:]) + var r httpRange + if start == "" { + // If no start is specified, end specifies the + // range start relative to the end of the file. + i, err := strconv.ParseInt(end, 10, 64) + if err != nil { + return nil, errors.New("invalid range") + } + if i > size { + i = size + } + r.start = size - i + r.length = size - r.start + } else { + i, err := strconv.ParseInt(start, 10, 64) + if err != nil || i > size || i < 0 { + return nil, errors.New("invalid range") + } + r.start = i + if end == "" { + // If no end is specified, range extends to end of the file. + r.length = size - r.start + } else { + i, err := strconv.ParseInt(end, 10, 64) + if err != nil || r.start > i { + return nil, errors.New("invalid range") + } + if i >= size { + i = size - 1 + } + r.length = i - r.start + 1 + } + } + ranges = append(ranges, r) + } + return ranges, nil +} + +// countingWriter counts how many bytes have been written to it. +type countingWriter int64 + +func (w *countingWriter) Write(p []byte) (n int, err error) { + *w += countingWriter(len(p)) + return len(p), nil +} + +// rangesMIMESize returns the number of bytes it takes to encode the +// provided ranges as a multipart response. +func rangesMIMESize(ranges []httpRange, contentType string, contentSize int64) (encSize int64) { + var w countingWriter + mw := multipart.NewWriter(&w) + for _, ra := range ranges { + mw.CreatePart(ra.mimeHeader(contentType, contentSize)) + encSize += ra.length + } + mw.Close() + encSize += int64(w) + return +} + +func sumRangesSize(ranges []httpRange) (size int64) { + for _, ra := range ranges { + size += ra.length + } + return +} diff --git a/weed/server/volume_server_handlers_read.go b/weed/server/volume_server_handlers_read.go new file mode 100644 index 000000000..3889afe5c --- /dev/null +++ b/weed/server/volume_server_handlers_read.go @@ -0,0 +1,301 @@ +package weed_server + +import ( + "bytes" + "io" + "mime" + "mime/multipart" + "net/http" + "path" + "strconv" + "strings" + "time" + + "net/url" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/images" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var fileNameEscaper = strings.NewReplacer("\\", "\\\\", "\"", "\\\"") + +func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) { + n := new(storage.Needle) + vid, fid, filename, ext, _ := parseURLPath(r.URL.Path) + volumeId, err := storage.NewVolumeId(vid) + if err != nil { + glog.V(2).Infoln("parsing error:", err, r.URL.Path) + w.WriteHeader(http.StatusBadRequest) + return + } + err = n.ParsePath(fid) + if err != nil { + glog.V(2).Infoln("parsing fid error:", err, r.URL.Path) + w.WriteHeader(http.StatusBadRequest) + return + } + + glog.V(4).Infoln("volume", volumeId, "reading", n) + if !vs.store.HasVolume(volumeId) { + if !vs.ReadRedirect { + glog.V(2).Infoln("volume is not local:", err, r.URL.Path) + w.WriteHeader(http.StatusNotFound) + return + } + lookupResult, err := operation.Lookup(vs.GetMasterNode(), volumeId.String()) + glog.V(2).Infoln("volume", volumeId, "found on", lookupResult, "error", err) + if err == nil && len(lookupResult.Locations) > 0 { + u, _ := url.Parse(util.NormalizeUrl(lookupResult.Locations[0].PublicUrl)) + u.Path = r.URL.Path + arg := url.Values{} + if c := r.FormValue("collection"); c != "" { + arg.Set("collection", c) + } + u.RawQuery = arg.Encode() + http.Redirect(w, r, u.String(), http.StatusMovedPermanently) + + } else { + glog.V(2).Infoln("lookup error:", err, r.URL.Path) + w.WriteHeader(http.StatusNotFound) + } + return + } + cookie := n.Cookie + count, e := vs.store.ReadVolumeNeedle(volumeId, n) + glog.V(4).Infoln("read bytes", count, "error", e) + if e != nil || count <= 0 { + glog.V(0).Infoln("read error:", e, r.URL.Path) + w.WriteHeader(http.StatusNotFound) + return + } + defer n.ReleaseMemory() + if n.Cookie != cookie { + glog.V(0).Infoln("request", r.URL.Path, "with unmaching cookie seen:", cookie, "expected:", n.Cookie, "from", r.RemoteAddr, "agent", r.UserAgent()) + w.WriteHeader(http.StatusNotFound) + return + } + if n.LastModified != 0 { + w.Header().Set("Last-Modified", time.Unix(int64(n.LastModified), 0).UTC().Format(http.TimeFormat)) + if r.Header.Get("If-Modified-Since") != "" { + if t, parseError := time.Parse(http.TimeFormat, r.Header.Get("If-Modified-Since")); parseError == nil { + if t.Unix() >= int64(n.LastModified) { + w.WriteHeader(http.StatusNotModified) + return + } + } + } + } + etag := n.Etag() + if inm := r.Header.Get("If-None-Match"); inm == etag { + w.WriteHeader(http.StatusNotModified) + return + } + w.Header().Set("Etag", etag) + + if vs.tryHandleChunkedFile(n, filename, w, r) { + return + } + + if n.NameSize > 0 && filename == "" { + filename = string(n.Name) + if ext == "" { + ext = path.Ext(filename) + } + } + mtype := "" + if n.MimeSize > 0 { + mt := string(n.Mime) + if !strings.HasPrefix(mt, "application/octet-stream") { + mtype = mt + } + } + + if ext != ".gz" { + if n.IsGzipped() { + if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") { + w.Header().Set("Content-Encoding", "gzip") + } else { + if n.Data, err = operation.UnGzipData(n.Data); err != nil { + glog.V(0).Infoln("ungzip error:", err, r.URL.Path) + } + } + } + } + if ext == ".png" || ext == ".jpg" || ext == ".gif" { + width, height := 0, 0 + if r.FormValue("width") != "" { + width, _ = strconv.Atoi(r.FormValue("width")) + } + if r.FormValue("height") != "" { + height, _ = strconv.Atoi(r.FormValue("height")) + } + n.Data, _, _ = images.Resized(ext, n.Data, width, height) + } + + if e := writeResponseContent(filename, mtype, bytes.NewReader(n.Data), w, r); e != nil { + glog.V(2).Infoln("response write error:", e) + } +} + +func (vs *VolumeServer) FaviconHandler(w http.ResponseWriter, r *http.Request) { + data, err := images.Asset("favicon/favicon.ico") + if err != nil { + glog.V(2).Infoln("favicon read error:", err) + return + } + + if e := writeResponseContent("favicon.ico", "image/x-icon", bytes.NewReader(data), w, r); e != nil { + glog.V(2).Infoln("response write error:", e) + } +} + +func (vs *VolumeServer) tryHandleChunkedFile(n *storage.Needle, fileName string, w http.ResponseWriter, r *http.Request) (processed bool) { + if !n.IsChunkedManifest() { + return false + } + + chunkManifest, e := operation.LoadChunkManifest(n.Data, n.IsGzipped()) + if e != nil { + glog.V(0).Infof("load chunked manifest (%s) error: %v", r.URL.Path, e) + return false + } + if fileName == "" && chunkManifest.Name != "" { + fileName = chunkManifest.Name + } + mType := "" + if chunkManifest.Mime != "" { + mt := chunkManifest.Mime + if !strings.HasPrefix(mt, "application/octet-stream") { + mType = mt + } + } + + w.Header().Set("X-File-Store", "chunked") + + chunkedFileReader := &operation.ChunkedFileReader{ + Manifest: chunkManifest, + Master: vs.GetMasterNode(), + } + defer chunkedFileReader.Close() + if e := writeResponseContent(fileName, mType, chunkedFileReader, w, r); e != nil { + glog.V(2).Infoln("response write error:", e) + } + return true +} + +func writeResponseContent(filename, mimeType string, rs io.ReadSeeker, w http.ResponseWriter, r *http.Request) error { + totalSize, e := rs.Seek(0, 2) + if mimeType == "" { + if ext := path.Ext(filename); ext != "" { + mimeType = mime.TypeByExtension(ext) + } + } + if mimeType != "" { + w.Header().Set("Content-Type", mimeType) + } + if filename != "" { + contentDisposition := "inline" + if r.FormValue("dl") != "" { + if dl, _ := strconv.ParseBool(r.FormValue("dl")); dl { + contentDisposition = "attachment" + } + } + w.Header().Set("Content-Disposition", contentDisposition+`; filename="`+fileNameEscaper.Replace(filename)+`"`) + } + w.Header().Set("Accept-Ranges", "bytes") + if r.Method == "HEAD" { + w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) + return nil + } + rangeReq := r.Header.Get("Range") + if rangeReq == "" { + w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) + if _, e = rs.Seek(0, 0); e != nil { + return e + } + _, e = io.Copy(w, rs) + return e + } + + //the rest is dealing with partial content request + //mostly copy from src/pkg/net/http/fs.go + ranges, err := parseRange(rangeReq, totalSize) + if err != nil { + http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable) + return nil + } + if sumRangesSize(ranges) > totalSize { + // The total number of bytes in all the ranges + // is larger than the size of the file by + // itself, so this is probably an attack, or a + // dumb client. Ignore the range request. + return nil + } + if len(ranges) == 0 { + return nil + } + if len(ranges) == 1 { + // RFC 2616, Section 14.16: + // "When an HTTP message includes the content of a single + // range (for example, a response to a request for a + // single range, or to a request for a set of ranges + // that overlap without any holes), this content is + // transmitted with a Content-Range header, and a + // Content-Length header showing the number of bytes + // actually transferred. + // ... + // A response to a request for a single range MUST NOT + // be sent using the multipart/byteranges media type." + ra := ranges[0] + w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) + w.Header().Set("Content-Range", ra.contentRange(totalSize)) + w.WriteHeader(http.StatusPartialContent) + if _, e = rs.Seek(ra.start, 0); e != nil { + return e + } + + _, e = io.CopyN(w, rs, ra.length) + return e + } + // process multiple ranges + for _, ra := range ranges { + if ra.start > totalSize { + http.Error(w, "Out of Range", http.StatusRequestedRangeNotSatisfiable) + return nil + } + } + sendSize := rangesMIMESize(ranges, mimeType, totalSize) + pr, pw := io.Pipe() + mw := multipart.NewWriter(pw) + w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary()) + sendContent := pr + defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish. + go func() { + for _, ra := range ranges { + part, e := mw.CreatePart(ra.mimeHeader(mimeType, totalSize)) + if e != nil { + pw.CloseWithError(e) + return + } + if _, e = rs.Seek(ra.start, 0); e != nil { + pw.CloseWithError(e) + return + } + if _, e = io.CopyN(part, rs, ra.length); e != nil { + pw.CloseWithError(e) + return + } + } + mw.Close() + pw.Close() + }() + if w.Header().Get("Content-Encoding") == "" { + w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10)) + } + w.WriteHeader(http.StatusPartialContent) + _, e = io.CopyN(w, sendContent, sendSize) + return e +} diff --git a/weed/server/volume_server_handlers_sync.go b/weed/server/volume_server_handlers_sync.go new file mode 100644 index 000000000..8a2e30743 --- /dev/null +++ b/weed/server/volume_server_handlers_sync.go @@ -0,0 +1,87 @@ +package weed_server + +import ( + "fmt" + "net/http" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func (vs *VolumeServer) getVolumeSyncStatusHandler(w http.ResponseWriter, r *http.Request) { + v, err := vs.getVolume("volume", r) + if v == nil { + writeJsonError(w, r, http.StatusBadRequest, err) + return + } + syncStat := v.GetVolumeSyncStatus() + if syncStat.Error != "" { + writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Get Volume %d status error: %s", v.Id, syncStat.Error)) + glog.V(2).Infoln("getVolumeSyncStatusHandler volume =", r.FormValue("volume"), ", error =", err) + } else { + writeJsonQuiet(w, r, http.StatusOK, syncStat) + } +} + +func (vs *VolumeServer) getVolumeIndexContentHandler(w http.ResponseWriter, r *http.Request) { + v, err := vs.getVolume("volume", r) + if v == nil { + writeJsonError(w, r, http.StatusBadRequest, err) + return + } + content, err := v.IndexFileContent() + if err != nil { + writeJsonError(w, r, http.StatusInternalServerError, err) + return + } + w.Write(content) +} + +func (vs *VolumeServer) getVolumeDataContentHandler(w http.ResponseWriter, r *http.Request) { + v, err := vs.getVolume("volume", r) + if v == nil { + writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("Not Found volume: %v", err)) + return + } + if int(v.SuperBlock.CompactRevision) != util.ParseInt(r.FormValue("revision"), 0) { + writeJsonError(w, r, http.StatusExpectationFailed, fmt.Errorf("Requested Volume Revision is %s, but current revision is %d", r.FormValue("revision"), v.SuperBlock.CompactRevision)) + return + } + offset := uint32(util.ParseUint64(r.FormValue("offset"), 0)) + size := uint32(util.ParseUint64(r.FormValue("size"), 0)) + content, block, err := storage.ReadNeedleBlob(v.DataFile(), int64(offset)*storage.NeedlePaddingSize, size) + defer storage.ReleaseBytes(block.Bytes) + if err != nil { + writeJsonError(w, r, http.StatusInternalServerError, err) + return + } + + id := util.ParseUint64(r.FormValue("id"), 0) + n := new(storage.Needle) + n.ParseNeedleHeader(content) + if id != n.Id { + writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("Expected file entry id %d, but found %d", id, n.Id)) + return + } + + w.Write(content) +} + +func (vs *VolumeServer) getVolume(volumeParameterName string, r *http.Request) (*storage.Volume, error) { + volumeIdString := r.FormValue(volumeParameterName) + if volumeIdString == "" { + err := fmt.Errorf("Empty Volume Id: Need to pass in %s=the_volume_id.", volumeParameterName) + return nil, err + } + vid, err := storage.NewVolumeId(volumeIdString) + if err != nil { + err = fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) + return nil, err + } + v := vs.store.GetVolume(vid) + if v == nil { + return nil, fmt.Errorf("Not Found Volume Id %s: %d", volumeIdString, vid) + } + return v, nil +} diff --git a/weed/server/volume_server_handlers_ui.go b/weed/server/volume_server_handlers_ui.go new file mode 100644 index 000000000..7923c95c0 --- /dev/null +++ b/weed/server/volume_server_handlers_ui.go @@ -0,0 +1,38 @@ +package weed_server + +import ( + "net/http" + "path/filepath" + "time" + + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/util" + ui "github.com/chrislusf/seaweedfs/weed/server/volume_server_ui" +) + +func (vs *VolumeServer) uiStatusHandler(w http.ResponseWriter, r *http.Request) { + infos := make(map[string]interface{}) + infos["Up Time"] = time.Now().Sub(startTime).String() + var ds []*stats.DiskStatus + for _, loc := range vs.store.Locations { + if dir, e := filepath.Abs(loc.Directory); e == nil { + ds = append(ds, stats.NewDiskStatus(dir)) + } + } + args := struct { + Version string + Master string + Volumes interface{} + DiskStatuses interface{} + Stats interface{} + Counters *stats.ServerStats + }{ + util.VERSION, + vs.masterNode, + vs.store.Status(), + ds, + infos, + serverStats, + } + ui.StatusTpl.Execute(w, args) +} diff --git a/weed/server/volume_server_handlers_vacuum.go b/weed/server/volume_server_handlers_vacuum.go new file mode 100644 index 000000000..ef348d35c --- /dev/null +++ b/weed/server/volume_server_handlers_vacuum.go @@ -0,0 +1,35 @@ +package weed_server + +import ( + "net/http" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func (vs *VolumeServer) vacuumVolumeCheckHandler(w http.ResponseWriter, r *http.Request) { + err, ret := vs.store.CheckCompactVolume(r.FormValue("volume"), r.FormValue("garbageThreshold")) + if err == nil { + writeJsonQuiet(w, r, http.StatusOK, map[string]interface{}{"error": "", "result": ret}) + } else { + writeJsonQuiet(w, r, http.StatusInternalServerError, map[string]interface{}{"error": err.Error(), "result": false}) + } + glog.V(2).Infoln("checked compacting volume =", r.FormValue("volume"), "garbageThreshold =", r.FormValue("garbageThreshold"), "vacuum =", ret) +} +func (vs *VolumeServer) vacuumVolumeCompactHandler(w http.ResponseWriter, r *http.Request) { + err := vs.store.CompactVolume(r.FormValue("volume")) + if err == nil { + writeJsonQuiet(w, r, http.StatusOK, map[string]string{"error": ""}) + } else { + writeJsonError(w, r, http.StatusInternalServerError, err) + } + glog.V(2).Infoln("compacted volume =", r.FormValue("volume"), ", error =", err) +} +func (vs *VolumeServer) vacuumVolumeCommitHandler(w http.ResponseWriter, r *http.Request) { + err := vs.store.CommitCompactVolume(r.FormValue("volume")) + if err == nil { + writeJsonQuiet(w, r, http.StatusOK, map[string]string{"error": ""}) + } else { + writeJsonError(w, r, http.StatusInternalServerError, err) + } + glog.V(2).Infoln("commit compact volume =", r.FormValue("volume"), ", error =", err) +} diff --git a/weed/server/volume_server_handlers_write.go b/weed/server/volume_server_handlers_write.go new file mode 100644 index 000000000..e7ca2f8e1 --- /dev/null +++ b/weed/server/volume_server_handlers_write.go @@ -0,0 +1,165 @@ +package weed_server + +import ( + "errors" + "fmt" + "net/http" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/topology" +) + +func (vs *VolumeServer) PostHandler(w http.ResponseWriter, r *http.Request) { + if e := r.ParseForm(); e != nil { + glog.V(0).Infoln("form parse error:", e) + writeJsonError(w, r, http.StatusBadRequest, e) + return + } + vid, _, _, _, _ := parseURLPath(r.URL.Path) + volumeId, ve := storage.NewVolumeId(vid) + if ve != nil { + glog.V(0).Infoln("NewVolumeId error:", ve) + writeJsonError(w, r, http.StatusBadRequest, ve) + return + } + needle, ne := storage.NewNeedle(r, vs.FixJpgOrientation) + if ne != nil { + writeJsonError(w, r, http.StatusBadRequest, ne) + return + } + + ret := operation.UploadResult{} + size, errorStatus := topology.ReplicatedWrite(vs.GetMasterNode(), + vs.store, volumeId, needle, r) + httpStatus := http.StatusCreated + if errorStatus != "" { + httpStatus = http.StatusInternalServerError + ret.Error = errorStatus + } + if needle.HasName() { + ret.Name = string(needle.Name) + } + ret.Size = size + writeJsonQuiet(w, r, httpStatus, ret) +} + +func (vs *VolumeServer) DeleteHandler(w http.ResponseWriter, r *http.Request) { + n := new(storage.Needle) + vid, fid, _, _, _ := parseURLPath(r.URL.Path) + volumeId, _ := storage.NewVolumeId(vid) + n.ParsePath(fid) + + glog.V(2).Infoln("deleting", n) + + cookie := n.Cookie + + _, ok := vs.store.ReadVolumeNeedle(volumeId, n) + if ok != nil { + m := make(map[string]uint32) + m["size"] = 0 + writeJsonQuiet(w, r, http.StatusNotFound, m) + return + } + defer n.ReleaseMemory() + + if n.Cookie != cookie { + glog.V(0).Infoln("delete", r.URL.Path, "with unmaching cookie from ", r.RemoteAddr, "agent", r.UserAgent()) + writeJsonError(w, r, http.StatusBadRequest, errors.New("File Random Cookie does not match.")) + return + } + + count := int64(n.Size) + + if n.IsChunkedManifest() { + chunkManifest, e := operation.LoadChunkManifest(n.Data, n.IsGzipped()) + if e != nil { + writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Load chunks manifest error: %v", e)) + return + } + // make sure all chunks had deleted before delete manifest + if e := chunkManifest.DeleteChunks(vs.GetMasterNode()); e != nil { + writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Delete chunks error: %v", e)) + return + } + count = chunkManifest.Size + } + + _, err := topology.ReplicatedDelete(vs.GetMasterNode(), vs.store, volumeId, n, r) + + if err == nil { + m := make(map[string]int64) + m["size"] = count + writeJsonQuiet(w, r, http.StatusAccepted, m) + } else { + writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Deletion Failed: %v", err)) + } + +} + +//Experts only: takes multiple fid parameters. This function does not propagate deletes to replicas. +func (vs *VolumeServer) batchDeleteHandler(w http.ResponseWriter, r *http.Request) { + r.ParseForm() + var ret []operation.DeleteResult + for _, fid := range r.Form["fid"] { + vid, id_cookie, err := operation.ParseFileId(fid) + if err != nil { + ret = append(ret, operation.DeleteResult{ + Fid: fid, + Status: http.StatusBadRequest, + Error: err.Error()}) + continue + } + n := new(storage.Needle) + volumeId, _ := storage.NewVolumeId(vid) + n.ParsePath(id_cookie) + glog.V(4).Infoln("batch deleting", n) + cookie := n.Cookie + if _, err := vs.store.ReadVolumeNeedle(volumeId, n); err != nil { + ret = append(ret, operation.DeleteResult{ + Fid: fid, + Status: http.StatusNotFound, + Error: err.Error(), + }) + continue + } + + if n.IsChunkedManifest() { + ret = append(ret, operation.DeleteResult{ + Fid: fid, + Status: http.StatusNotAcceptable, + Error: "ChunkManifest: not allowed in batch delete mode.", + }) + n.ReleaseMemory() + continue + } + + if n.Cookie != cookie { + ret = append(ret, operation.DeleteResult{ + Fid: fid, + Status: http.StatusBadRequest, + Error: "File Random Cookie does not match.", + }) + glog.V(0).Infoln("deleting", fid, "with unmaching cookie from ", r.RemoteAddr, "agent", r.UserAgent()) + n.ReleaseMemory() + return + } + if size, err := vs.store.Delete(volumeId, n); err != nil { + ret = append(ret, operation.DeleteResult{ + Fid: fid, + Status: http.StatusInternalServerError, + Error: err.Error()}, + ) + } else { + ret = append(ret, operation.DeleteResult{ + Fid: fid, + Status: http.StatusAccepted, + Size: int(size)}, + ) + } + n.ReleaseMemory() + } + + writeJsonQuiet(w, r, http.StatusAccepted, ret) +} diff --git a/weed/server/volume_server_ui/templates.go b/weed/server/volume_server_ui/templates.go new file mode 100644 index 000000000..c3db6e92a --- /dev/null +++ b/weed/server/volume_server_ui/templates.go @@ -0,0 +1,135 @@ +package master_ui + +import ( + "html/template" + "strconv" + "strings" +) + +func join(data []int64) string { + var ret []string + for _, d := range data { + ret = append(ret, strconv.Itoa(int(d))) + } + return strings.Join(ret, ",") +} + +var funcMap = template.FuncMap{ + "join": join, +} + +var StatusTpl = template.Must(template.New("status").Funcs(funcMap).Parse(`<!DOCTYPE html> +<html> + <head> + <title>SeaweedFS {{ .Version }}</title> + <link rel="icon" href="http://7viirv.com1.z0.glb.clouddn.com/seaweed_favicon.png" sizes="32x32" /> + <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css"> + <script type="text/javascript" src="https://code.jquery.com/jquery-2.1.3.min.js"></script> + <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery-sparklines/2.1.2/jquery.sparkline.min.js"></script> + <script type="text/javascript"> + $(function() { + var periods = ['second', 'minute', 'hour', 'day']; + for (i = 0; i < periods.length; i++) { + var period = periods[i]; + $('.inlinesparkline-'+period).sparkline('html', { + type: 'line', + barColor: 'red', + tooltipSuffix:' request per '+period, + }); + } + }); + </script> + <style> + #jqstooltip{ + height: 28px !important; + width: 150px !important; + } + </style> + </head> + <body> + <div class="container"> + <div class="page-header"> + <h1> + <img src="http://7viirv.com1.z0.glb.clouddn.com/seaweed50x50.png"></img> + SeaweedFS <small>{{ .Version }}</small> + </h1> + </div> + + <div class="row"> + <div class="col-sm-6"> + <h2>Disk Stats</h2> + <table class="table table-condensed table-striped"> + {{ range .DiskStatuses }} + <tr> + <th>{{ .Dir }}</th> + <td>{{ .Free }} Bytes Free</td> + </tr> + {{ end }} + </table> + </div> + + <div class="col-sm-6"> + <h2>System Stats</h2> + <table class="table table-condensed table-striped"> + <tr> + <th>Master</th> + <td><a href="http://{{.Master}}/ui/index.html">{{.Master}}</a></td> + </tr> + <tr> + <th>Weekly # ReadRequests</th> + <td><span class="inlinesparkline-day">{{ .Counters.ReadRequests.WeekCounter.ToList | join }}</span></td> + </tr> + <tr> + <th>Daily # ReadRequests</th> + <td><span class="inlinesparkline-hour">{{ .Counters.ReadRequests.DayCounter.ToList | join }}</span></td> + </tr> + <tr> + <th>Hourly # ReadRequests</th> + <td><span class="inlinesparkline-minute">{{ .Counters.ReadRequests.HourCounter.ToList | join }}</span></td> + </tr> + <tr> + <th>Last Minute # ReadRequests</th> + <td><span class="inlinesparkline-second">{{ .Counters.ReadRequests.MinuteCounter.ToList | join }}</span></td> + </tr> + {{ range $key, $val := .Stats }} + <tr> + <th>{{ $key }}</th> + <td>{{ $val }}</td> + </tr> + {{ end }} + </table> + </div> + </div> + + <div class="row"> + <h2>Volumes</h2> + <table class="table table-striped"> + <thead> + <tr> + <th>Id</th> + <th>Collection</th> + <th>Size</th> + <th>Files</th> + <th>Trash</th> + <th>TTL</th> + </tr> + </thead> + <tbody> + {{ range .Volumes }} + <tr> + <td><code>{{ .Id }}</code></td> + <td>{{ .Collection }}</td> + <td>{{ .Size }} Bytes</td> + <td>{{ .FileCount }}</td> + <td>{{ .DeleteCount }} / {{.DeletedByteCount}} Bytes</td> + <td>{{ .Ttl }}</td> + </tr> + {{ end }} + </tbody> + </table> + </div> + + </div> + </body> +</html> +`)) diff --git a/weed/stats/disk.go b/weed/stats/disk.go new file mode 100644 index 000000000..46d8c465e --- /dev/null +++ b/weed/stats/disk.go @@ -0,0 +1,14 @@ +package stats + +type DiskStatus struct { + Dir string + All uint64 + Used uint64 + Free uint64 +} + +func NewDiskStatus(path string) (disk *DiskStatus) { + disk = &DiskStatus{Dir: path} + disk.fillInStatus() + return +} diff --git a/weed/stats/disk_notsupported.go b/weed/stats/disk_notsupported.go new file mode 100644 index 000000000..e380d27ea --- /dev/null +++ b/weed/stats/disk_notsupported.go @@ -0,0 +1,7 @@ +// +build windows openbsd netbsd plan9 solaris + +package stats + +func (disk *DiskStatus) fillInStatus() { + return +} diff --git a/weed/stats/disk_supported.go b/weed/stats/disk_supported.go new file mode 100644 index 000000000..d68f0a32e --- /dev/null +++ b/weed/stats/disk_supported.go @@ -0,0 +1,19 @@ +// +build !windows,!openbsd,!netbsd,!plan9,!solaris + +package stats + +import ( + "syscall" +) + +func (disk *DiskStatus) fillInStatus() { + fs := syscall.Statfs_t{} + err := syscall.Statfs(disk.Dir, &fs) + if err != nil { + return + } + disk.All = fs.Blocks * uint64(fs.Bsize) + disk.Free = fs.Bfree * uint64(fs.Bsize) + disk.Used = disk.All - disk.Free + return +} diff --git a/weed/stats/duration_counter.go b/weed/stats/duration_counter.go new file mode 100644 index 000000000..69c8be61d --- /dev/null +++ b/weed/stats/duration_counter.go @@ -0,0 +1,94 @@ +package stats + +import ( + "time" +) + +type TimedValue struct { + t time.Time + val int64 +} + +func NewTimedValue(t time.Time, val int64) *TimedValue { + return &TimedValue{t: t, val: val} +} + +type RoundRobinCounter struct { + LastIndex int + Values []int64 + Counts []int64 +} + +func NewRoundRobinCounter(slots int) *RoundRobinCounter { + return &RoundRobinCounter{LastIndex: -1, Values: make([]int64, slots), Counts: make([]int64, slots)} +} +func (rrc *RoundRobinCounter) Add(index int, val int64) { + if index >= len(rrc.Values) { + return + } + for rrc.LastIndex != index { + rrc.LastIndex = (rrc.LastIndex + 1) % len(rrc.Values) + rrc.Values[rrc.LastIndex] = 0 + rrc.Counts[rrc.LastIndex] = 0 + } + rrc.Values[index] += val + rrc.Counts[index]++ +} +func (rrc *RoundRobinCounter) Max() (max int64) { + for _, val := range rrc.Values { + if max < val { + max = val + } + } + return +} +func (rrc *RoundRobinCounter) Count() (cnt int64) { + for _, c := range rrc.Counts { + cnt += c + } + return +} +func (rrc *RoundRobinCounter) Sum() (sum int64) { + for _, val := range rrc.Values { + sum += val + } + return +} + +func (rrc *RoundRobinCounter) ToList() (ret []int64) { + index := rrc.LastIndex + step := len(rrc.Values) + for step > 0 { + step-- + index++ + if index >= len(rrc.Values) { + index = 0 + } + ret = append(ret, rrc.Values[index]) + } + return +} + +type DurationCounter struct { + MinuteCounter *RoundRobinCounter + HourCounter *RoundRobinCounter + DayCounter *RoundRobinCounter + WeekCounter *RoundRobinCounter +} + +func NewDurationCounter() *DurationCounter { + return &DurationCounter{ + MinuteCounter: NewRoundRobinCounter(60), + HourCounter: NewRoundRobinCounter(60), + DayCounter: NewRoundRobinCounter(24), + WeekCounter: NewRoundRobinCounter(7), + } +} + +// Add is for cumulative counts +func (sc *DurationCounter) Add(tv *TimedValue) { + sc.MinuteCounter.Add(tv.t.Second(), tv.val) + sc.HourCounter.Add(tv.t.Minute(), tv.val) + sc.DayCounter.Add(tv.t.Hour(), tv.val) + sc.WeekCounter.Add(int(tv.t.Weekday()), tv.val) +} diff --git a/weed/stats/duration_counter_test.go b/weed/stats/duration_counter_test.go new file mode 100644 index 000000000..aa9d61c87 --- /dev/null +++ b/weed/stats/duration_counter_test.go @@ -0,0 +1,19 @@ +package stats + +import "testing" + +func TestRobinCounter(t *testing.T) { + rrc := NewRoundRobinCounter(60) + rrc.Add(0, 1) + rrc.Add(50, 2) + if rrc.Count() != 2 { + t.Fatal() + } + if rrc.Sum() != 3 { + t.Fatal() + } + /* + index out of range + */ + rrc.Add(61, 1) +} diff --git a/weed/stats/memory.go b/weed/stats/memory.go new file mode 100644 index 000000000..0700d92de --- /dev/null +++ b/weed/stats/memory.go @@ -0,0 +1,28 @@ +package stats + +import ( + "runtime" +) + +type MemStatus struct { + Goroutines int + All uint64 + Used uint64 + Free uint64 + Self uint64 + Heap uint64 + Stack uint64 +} + +func MemStat() MemStatus { + mem := MemStatus{} + mem.Goroutines = runtime.NumGoroutine() + memStat := new(runtime.MemStats) + runtime.ReadMemStats(memStat) + mem.Self = memStat.Alloc + mem.Heap = memStat.HeapAlloc + mem.Stack = memStat.StackInuse + + mem.fillInStatus() + return mem +} diff --git a/weed/stats/memory_notsupported.go b/weed/stats/memory_notsupported.go new file mode 100644 index 000000000..ba8229364 --- /dev/null +++ b/weed/stats/memory_notsupported.go @@ -0,0 +1,7 @@ +// +build !linux + +package stats + +func (mem *MemStatus) fillInStatus() { + return +} diff --git a/weed/stats/memory_supported.go b/weed/stats/memory_supported.go new file mode 100644 index 000000000..fd0c36d72 --- /dev/null +++ b/weed/stats/memory_supported.go @@ -0,0 +1,18 @@ +// +build linux + +package stats + +import ( + "syscall" +) + +func (mem *MemStatus) fillInStatus() { + //system memory usage + sysInfo := new(syscall.Sysinfo_t) + err := syscall.Sysinfo(sysInfo) + if err == nil { + mem.All = uint64(sysInfo.Totalram) //* uint64(syscall.Getpagesize()) + mem.Free = uint64(sysInfo.Freeram) //* uint64(syscall.Getpagesize()) + mem.Used = mem.All - mem.Free + } +} diff --git a/weed/stats/stats.go b/weed/stats/stats.go new file mode 100644 index 000000000..09826152f --- /dev/null +++ b/weed/stats/stats.go @@ -0,0 +1,113 @@ +package stats + +import ( + "time" +) + +type ServerStats struct { + Requests *DurationCounter + Connections *DurationCounter + AssignRequests *DurationCounter + ReadRequests *DurationCounter + WriteRequests *DurationCounter + DeleteRequests *DurationCounter + BytesIn *DurationCounter + BytesOut *DurationCounter +} + +type Channels struct { + Connections chan *TimedValue + Requests chan *TimedValue + AssignRequests chan *TimedValue + ReadRequests chan *TimedValue + WriteRequests chan *TimedValue + DeleteRequests chan *TimedValue + BytesIn chan *TimedValue + BytesOut chan *TimedValue +} + +var ( + Chan *Channels +) + +func init() { + Chan = &Channels{ + Connections: make(chan *TimedValue, 100), + Requests: make(chan *TimedValue, 100), + AssignRequests: make(chan *TimedValue, 100), + ReadRequests: make(chan *TimedValue, 100), + WriteRequests: make(chan *TimedValue, 100), + DeleteRequests: make(chan *TimedValue, 100), + BytesIn: make(chan *TimedValue, 100), + BytesOut: make(chan *TimedValue, 100), + } +} + +func NewServerStats() *ServerStats { + return &ServerStats{ + Requests: NewDurationCounter(), + Connections: NewDurationCounter(), + AssignRequests: NewDurationCounter(), + ReadRequests: NewDurationCounter(), + WriteRequests: NewDurationCounter(), + DeleteRequests: NewDurationCounter(), + BytesIn: NewDurationCounter(), + BytesOut: NewDurationCounter(), + } +} + +func ConnectionOpen() { + Chan.Connections <- NewTimedValue(time.Now(), 1) +} +func ConnectionClose() { + Chan.Connections <- NewTimedValue(time.Now(), -1) +} +func RequestOpen() { + Chan.Requests <- NewTimedValue(time.Now(), 1) +} +func RequestClose() { + Chan.Requests <- NewTimedValue(time.Now(), -1) +} +func AssignRequest() { + Chan.AssignRequests <- NewTimedValue(time.Now(), 1) +} +func ReadRequest() { + Chan.ReadRequests <- NewTimedValue(time.Now(), 1) +} +func WriteRequest() { + Chan.WriteRequests <- NewTimedValue(time.Now(), 1) +} +func DeleteRequest() { + Chan.DeleteRequests <- NewTimedValue(time.Now(), 1) +} +func BytesIn(val int64) { + Chan.BytesIn <- NewTimedValue(time.Now(), val) +} +func BytesOut(val int64) { + Chan.BytesOut <- NewTimedValue(time.Now(), val) +} + +func (ss *ServerStats) Start() { + for { + select { + case tv := <-Chan.Connections: + ss.Connections.Add(tv) + case tv := <-Chan.Requests: + ss.Requests.Add(tv) + case tv := <-Chan.AssignRequests: + ss.AssignRequests.Add(tv) + case tv := <-Chan.ReadRequests: + ss.ReadRequests.Add(tv) + case tv := <-Chan.WriteRequests: + ss.WriteRequests.Add(tv) + case tv := <-Chan.ReadRequests: + ss.ReadRequests.Add(tv) + case tv := <-Chan.DeleteRequests: + ss.DeleteRequests.Add(tv) + case tv := <-Chan.BytesIn: + ss.BytesIn.Add(tv) + case tv := <-Chan.BytesOut: + ss.BytesOut.Add(tv) + } + } +} diff --git a/weed/storage/compact_map.go b/weed/storage/compact_map.go new file mode 100644 index 000000000..d4438d044 --- /dev/null +++ b/weed/storage/compact_map.go @@ -0,0 +1,207 @@ +package storage + +import ( + "strconv" + "sync" +) + +type NeedleValue struct { + Key Key + Offset uint32 `comment:"Volume offset"` //since aligned to 8 bytes, range is 4G*8=32G + Size uint32 `comment:"Size of the data portion"` +} + +const ( + batch = 100000 +) + +type Key uint64 + +func (k Key) String() string { + return strconv.FormatUint(uint64(k), 10) +} + +type CompactSection struct { + sync.RWMutex + values []NeedleValue + overflow map[Key]NeedleValue + start Key + end Key + counter int +} + +func NewCompactSection(start Key) *CompactSection { + return &CompactSection{ + values: make([]NeedleValue, batch), + overflow: make(map[Key]NeedleValue), + start: start, + } +} + +//return old entry size +func (cs *CompactSection) Set(key Key, offset uint32, size uint32) uint32 { + ret := uint32(0) + if key > cs.end { + cs.end = key + } + cs.Lock() + if i := cs.binarySearchValues(key); i >= 0 { + ret = cs.values[i].Size + //println("key", key, "old size", ret) + cs.values[i].Offset, cs.values[i].Size = offset, size + } else { + needOverflow := cs.counter >= batch + needOverflow = needOverflow || cs.counter > 0 && cs.values[cs.counter-1].Key > key + if needOverflow { + //println("start", cs.start, "counter", cs.counter, "key", key) + if oldValue, found := cs.overflow[key]; found { + ret = oldValue.Size + } + cs.overflow[key] = NeedleValue{Key: key, Offset: offset, Size: size} + } else { + p := &cs.values[cs.counter] + p.Key, p.Offset, p.Size = key, offset, size + //println("added index", cs.counter, "key", key, cs.values[cs.counter].Key) + cs.counter++ + } + } + cs.Unlock() + return ret +} + +//return old entry size +func (cs *CompactSection) Delete(key Key) uint32 { + cs.Lock() + ret := uint32(0) + if i := cs.binarySearchValues(key); i >= 0 { + if cs.values[i].Size > 0 { + ret = cs.values[i].Size + cs.values[i].Size = 0 + } + } + if v, found := cs.overflow[key]; found { + delete(cs.overflow, key) + ret = v.Size + } + cs.Unlock() + return ret +} +func (cs *CompactSection) Get(key Key) (*NeedleValue, bool) { + cs.RLock() + if v, ok := cs.overflow[key]; ok { + cs.RUnlock() + return &v, true + } + if i := cs.binarySearchValues(key); i >= 0 { + cs.RUnlock() + return &cs.values[i], true + } + cs.RUnlock() + return nil, false +} +func (cs *CompactSection) binarySearchValues(key Key) int { + l, h := 0, cs.counter-1 + if h >= 0 && cs.values[h].Key < key { + return -2 + } + //println("looking for key", key) + for l <= h { + m := (l + h) / 2 + //println("mid", m, "key", cs.values[m].Key, cs.values[m].Offset, cs.values[m].Size) + if cs.values[m].Key < key { + l = m + 1 + } else if key < cs.values[m].Key { + h = m - 1 + } else { + //println("found", m) + return m + } + } + return -1 +} + +//This map assumes mostly inserting increasing keys +type CompactMap struct { + list []*CompactSection +} + +func NewCompactMap() CompactMap { + return CompactMap{} +} + +func (cm *CompactMap) Set(key Key, offset uint32, size uint32) uint32 { + x := cm.binarySearchCompactSection(key) + if x < 0 { + //println(x, "creating", len(cm.list), "section, starting", key) + cm.list = append(cm.list, NewCompactSection(key)) + x = len(cm.list) - 1 + //keep compact section sorted by start + for x > 0 { + if cm.list[x-1].start > cm.list[x].start { + cm.list[x-1], cm.list[x] = cm.list[x], cm.list[x-1] + x = x - 1 + } else { + break + } + } + } + return cm.list[x].Set(key, offset, size) +} +func (cm *CompactMap) Delete(key Key) uint32 { + x := cm.binarySearchCompactSection(key) + if x < 0 { + return uint32(0) + } + return cm.list[x].Delete(key) +} +func (cm *CompactMap) Get(key Key) (*NeedleValue, bool) { + x := cm.binarySearchCompactSection(key) + if x < 0 { + return nil, false + } + return cm.list[x].Get(key) +} +func (cm *CompactMap) binarySearchCompactSection(key Key) int { + l, h := 0, len(cm.list)-1 + if h < 0 { + return -5 + } + if cm.list[h].start <= key { + if cm.list[h].counter < batch || key <= cm.list[h].end { + return h + } + return -4 + } + for l <= h { + m := (l + h) / 2 + if key < cm.list[m].start { + h = m - 1 + } else { // cm.list[m].start <= key + if cm.list[m+1].start <= key { + l = m + 1 + } else { + return m + } + } + } + return -3 +} + +// Visit visits all entries or stop if any error when visiting +func (cm *CompactMap) Visit(visit func(NeedleValue) error) error { + for _, cs := range cm.list { + for _, v := range cs.overflow { + if err := visit(v); err != nil { + return err + } + } + for _, v := range cs.values { + if _, found := cs.overflow[v.Key]; !found { + if err := visit(v); err != nil { + return err + } + } + } + } + return nil +} diff --git a/weed/storage/compact_map_perf_test.go b/weed/storage/compact_map_perf_test.go new file mode 100644 index 000000000..cc7669139 --- /dev/null +++ b/weed/storage/compact_map_perf_test.go @@ -0,0 +1,45 @@ +package storage + +import ( + "log" + "os" + "testing" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func TestMemoryUsage(t *testing.T) { + + indexFile, ie := os.OpenFile("../../test/sample.idx", os.O_RDWR|os.O_RDONLY, 0644) + if ie != nil { + log.Fatalln(ie) + } + LoadNewNeedleMap(indexFile) + +} + +func LoadNewNeedleMap(file *os.File) CompactMap { + m := NewCompactMap() + bytes := make([]byte, 16*1024) + count, e := file.Read(bytes) + if count > 0 { + fstat, _ := file.Stat() + glog.V(0).Infoln("Loading index file", fstat.Name(), "size", fstat.Size()) + } + for count > 0 && e == nil { + for i := 0; i < count; i += 16 { + key := util.BytesToUint64(bytes[i : i+8]) + offset := util.BytesToUint32(bytes[i+8 : i+12]) + size := util.BytesToUint32(bytes[i+12 : i+16]) + if offset > 0 { + m.Set(Key(key), offset, size) + } else { + //delete(m, key) + } + } + + count, e = file.Read(bytes) + } + return m +} diff --git a/weed/storage/compact_map_test.go b/weed/storage/compact_map_test.go new file mode 100644 index 000000000..1ccb48edb --- /dev/null +++ b/weed/storage/compact_map_test.go @@ -0,0 +1,77 @@ +package storage + +import ( + "testing" +) + +func TestIssue52(t *testing.T) { + m := NewCompactMap() + m.Set(Key(10002), 10002, 10002) + if element, ok := m.Get(Key(10002)); ok { + println("key", 10002, "ok", ok, element.Key, element.Offset, element.Size) + } + m.Set(Key(10001), 10001, 10001) + if element, ok := m.Get(Key(10002)); ok { + println("key", 10002, "ok", ok, element.Key, element.Offset, element.Size) + } else { + t.Fatal("key 10002 missing after setting 10001") + } +} + +func TestXYZ(t *testing.T) { + m := NewCompactMap() + for i := uint32(0); i < 100*batch; i += 2 { + m.Set(Key(i), i, i) + } + + for i := uint32(0); i < 100*batch; i += 37 { + m.Delete(Key(i)) + } + + for i := uint32(0); i < 10*batch; i += 3 { + m.Set(Key(i), i+11, i+5) + } + + // for i := uint32(0); i < 100; i++ { + // if v := m.Get(Key(i)); v != nil { + // glog.V(4).Infoln(i, "=", v.Key, v.Offset, v.Size) + // } + // } + + for i := uint32(0); i < 10*batch; i++ { + v, ok := m.Get(Key(i)) + if i%3 == 0 { + if !ok { + t.Fatal("key", i, "missing!") + } + if v.Size != i+5 { + t.Fatal("key", i, "size", v.Size) + } + } else if i%37 == 0 { + if ok && v.Size > 0 { + t.Fatal("key", i, "should have been deleted needle value", v) + } + } else if i%2 == 0 { + if v.Size != i { + t.Fatal("key", i, "size", v.Size) + } + } + } + + for i := uint32(10 * batch); i < 100*batch; i++ { + v, ok := m.Get(Key(i)) + if i%37 == 0 { + if ok && v.Size > 0 { + t.Fatal("key", i, "should have been deleted needle value", v) + } + } else if i%2 == 0 { + if v == nil { + t.Fatal("key", i, "missing") + } + if v.Size != i { + t.Fatal("key", i, "size", v.Size) + } + } + } + +} diff --git a/weed/storage/crc.go b/weed/storage/crc.go new file mode 100644 index 000000000..494937784 --- /dev/null +++ b/weed/storage/crc.go @@ -0,0 +1,30 @@ +package storage + +import ( + "fmt" + "github.com/klauspost/crc32" + + "github.com/chrislusf/seaweedfs/weed/util" +) + +var table = crc32.MakeTable(crc32.Castagnoli) + +type CRC uint32 + +func NewCRC(b []byte) CRC { + return CRC(0).Update(b) +} + +func (c CRC) Update(b []byte) CRC { + return CRC(crc32.Update(uint32(c), table, b)) +} + +func (c CRC) Value() uint32 { + return uint32(c>>15|c<<17) + 0xa282ead8 +} + +func (n *Needle) Etag() string { + bits := make([]byte, 4) + util.Uint32toBytes(bits, uint32(n.Checksum)) + return fmt.Sprintf("\"%x\"", bits) +} diff --git a/weed/storage/disk_location.go b/weed/storage/disk_location.go new file mode 100644 index 000000000..cc3c83b63 --- /dev/null +++ b/weed/storage/disk_location.go @@ -0,0 +1,73 @@ +package storage + +import ( + "io/ioutil" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type DiskLocation struct { + Directory string + MaxVolumeCount int + volumes map[VolumeId]*Volume +} + +func NewDiskLocation(dir string, maxVolumeCount int) *DiskLocation { + location := &DiskLocation{Directory: dir, MaxVolumeCount: maxVolumeCount} + location.volumes = make(map[VolumeId]*Volume) + return location +} + +func (l *DiskLocation) loadExistingVolumes(needleMapKind NeedleMapType) { + + if dirs, err := ioutil.ReadDir(l.Directory); err == nil { + for _, dir := range dirs { + name := dir.Name() + if !dir.IsDir() && strings.HasSuffix(name, ".dat") { + collection := "" + base := name[:len(name)-len(".dat")] + i := strings.LastIndex(base, "_") + if i > 0 { + collection, base = base[0:i], base[i+1:] + } + if vid, err := NewVolumeId(base); err == nil { + if l.volumes[vid] == nil { + if v, e := NewVolume(l.Directory, collection, vid, needleMapKind, nil, nil); e == nil { + l.volumes[vid] = v + glog.V(0).Infof("data file %s, replicaPlacement=%s v=%d size=%d ttl=%s", l.Directory+"/"+name, v.ReplicaPlacement, v.Version(), v.Size(), v.Ttl.String()) + } else { + glog.V(0).Infof("new volume %s error %s", name, e) + } + } + } + } + } + } + glog.V(0).Infoln("Store started on dir:", l.Directory, "with", len(l.volumes), "volumes", "max", l.MaxVolumeCount) +} + +func (l *DiskLocation) DeleteCollectionFromDiskLocation(collection string) (e error) { + for k, v := range l.volumes { + if v.Collection == collection { + e = l.deleteVolumeById(k) + if e != nil { + return + } + } + } + return +} + +func (l *DiskLocation) deleteVolumeById(vid VolumeId) (e error) { + v, ok := l.volumes[vid] + if !ok { + return + } + e = v.Destroy() + if e != nil { + return + } + delete(l.volumes, vid) + return +} diff --git a/weed/storage/file_id.go b/weed/storage/file_id.go new file mode 100644 index 000000000..4cfdb16fa --- /dev/null +++ b/weed/storage/file_id.go @@ -0,0 +1,43 @@ +package storage + +import ( + "encoding/hex" + "errors" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type FileId struct { + VolumeId VolumeId + Key uint64 + Hashcode uint32 +} + +func NewFileIdFromNeedle(VolumeId VolumeId, n *Needle) *FileId { + return &FileId{VolumeId: VolumeId, Key: n.Id, Hashcode: n.Cookie} +} +func NewFileId(VolumeId VolumeId, Key uint64, Hashcode uint32) *FileId { + return &FileId{VolumeId: VolumeId, Key: Key, Hashcode: Hashcode} +} +func ParseFileId(fid string) (*FileId, error) { + a := strings.Split(fid, ",") + if len(a) != 2 { + glog.V(1).Infoln("Invalid fid ", fid, ", split length ", len(a)) + return nil, errors.New("Invalid fid " + fid) + } + vid_string, key_hash_string := a[0], a[1] + volumeId, _ := NewVolumeId(vid_string) + key, hash, e := ParseKeyHash(key_hash_string) + return &FileId{VolumeId: volumeId, Key: key, Hashcode: hash}, e +} +func (n *FileId) String() string { + bytes := make([]byte, 12) + util.Uint64toBytes(bytes[0:8], n.Key) + util.Uint32toBytes(bytes[8:12], n.Hashcode) + nonzero_index := 0 + for ; bytes[nonzero_index] == 0; nonzero_index++ { + } + return n.VolumeId.String() + "," + hex.EncodeToString(bytes[nonzero_index:]) +} diff --git a/weed/storage/needle.go b/weed/storage/needle.go new file mode 100644 index 000000000..29549b323 --- /dev/null +++ b/weed/storage/needle.go @@ -0,0 +1,231 @@ +package storage + +import ( + "fmt" + "io/ioutil" + "mime" + "net/http" + "path" + "strconv" + "strings" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/images" + "github.com/chrislusf/seaweedfs/weed/operation" +) + +const ( + NeedleHeaderSize = 16 //should never change this + NeedlePaddingSize = 8 + NeedleChecksumSize = 4 + MaxPossibleVolumeSize = 4 * 1024 * 1024 * 1024 * 8 +) + +/* +* A Needle means a uploaded and stored file. +* Needle file size is limited to 4GB for now. + */ +type Needle struct { + Cookie uint32 `comment:"random number to mitigate brute force lookups"` + Id uint64 `comment:"needle id"` + Size uint32 `comment:"sum of DataSize,Data,NameSize,Name,MimeSize,Mime"` + + DataSize uint32 `comment:"Data size"` //version2 + Data []byte `comment:"The actual file data"` + Flags byte `comment:"boolean flags"` //version2 + NameSize uint8 //version2 + Name []byte `comment:"maximum 256 characters"` //version2 + MimeSize uint8 //version2 + Mime []byte `comment:"maximum 256 characters"` //version2 + LastModified uint64 //only store LastModifiedBytesLength bytes, which is 5 bytes to disk + Ttl *TTL + + Checksum CRC `comment:"CRC32 to check integrity"` + Padding []byte `comment:"Aligned to 8 bytes"` + + rawBlock *Block // underlying supporing []byte, fetched and released into a pool +} + +func (n *Needle) String() (str string) { + str = fmt.Sprintf("Cookie:%d, Id:%d, Size:%d, DataSize:%d, Name: %s, Mime: %s", n.Cookie, n.Id, n.Size, n.DataSize, n.Name, n.Mime) + return +} + +func ParseUpload(r *http.Request) ( + fileName string, data []byte, mimeType string, isGzipped bool, + modifiedTime uint64, ttl *TTL, isChunkedFile bool, e error) { + form, fe := r.MultipartReader() + if fe != nil { + glog.V(0).Infoln("MultipartReader [ERROR]", fe) + e = fe + return + } + + //first multi-part item + part, fe := form.NextPart() + if fe != nil { + glog.V(0).Infoln("Reading Multi part [ERROR]", fe) + e = fe + return + } + + fileName = part.FileName() + if fileName != "" { + fileName = path.Base(fileName) + } + + data, e = ioutil.ReadAll(part) + if e != nil { + glog.V(0).Infoln("Reading Content [ERROR]", e) + return + } + + //if the filename is empty string, do a search on the other multi-part items + for fileName == "" { + part2, fe := form.NextPart() + if fe != nil { + break // no more or on error, just safely break + } + + fName := part2.FileName() + + //found the first <file type> multi-part has filename + if fName != "" { + data2, fe2 := ioutil.ReadAll(part2) + if fe2 != nil { + glog.V(0).Infoln("Reading Content [ERROR]", fe2) + e = fe2 + return + } + + //update + data = data2 + fileName = path.Base(fName) + break + } + } + + dotIndex := strings.LastIndex(fileName, ".") + ext, mtype := "", "" + if dotIndex > 0 { + ext = strings.ToLower(fileName[dotIndex:]) + mtype = mime.TypeByExtension(ext) + } + contentType := part.Header.Get("Content-Type") + if contentType != "" && mtype != contentType { + mimeType = contentType //only return mime type if not deductable + mtype = contentType + } + if part.Header.Get("Content-Encoding") == "gzip" { + isGzipped = true + } else if operation.IsGzippable(ext, mtype) { + if data, e = operation.GzipData(data); e != nil { + return + } + isGzipped = true + } + if ext == ".gz" { + isGzipped = true + } + if strings.HasSuffix(fileName, ".gz") && + !strings.HasSuffix(fileName, ".tar.gz") { + fileName = fileName[:len(fileName)-3] + } + modifiedTime, _ = strconv.ParseUint(r.FormValue("ts"), 10, 64) + ttl, _ = ReadTTL(r.FormValue("ttl")) + isChunkedFile, _ = strconv.ParseBool(r.FormValue("cm")) + return +} +func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) { + fname, mimeType, isGzipped, isChunkedFile := "", "", false, false + n = new(Needle) + fname, n.Data, mimeType, isGzipped, n.LastModified, n.Ttl, isChunkedFile, e = ParseUpload(r) + if e != nil { + return + } + if len(fname) < 256 { + n.Name = []byte(fname) + n.SetHasName() + } + if len(mimeType) < 256 { + n.Mime = []byte(mimeType) + n.SetHasMime() + } + if isGzipped { + n.SetGzipped() + } + if n.LastModified == 0 { + n.LastModified = uint64(time.Now().Unix()) + } + n.SetHasLastModifiedDate() + if n.Ttl != EMPTY_TTL { + n.SetHasTtl() + } + + if isChunkedFile { + n.SetIsChunkManifest() + } + + if fixJpgOrientation { + loweredName := strings.ToLower(fname) + if mimeType == "image/jpeg" || strings.HasSuffix(loweredName, ".jpg") || strings.HasSuffix(loweredName, ".jpeg") { + n.Data = images.FixJpgOrientation(n.Data) + } + } + + n.Checksum = NewCRC(n.Data) + + commaSep := strings.LastIndex(r.URL.Path, ",") + dotSep := strings.LastIndex(r.URL.Path, ".") + fid := r.URL.Path[commaSep+1:] + if dotSep > 0 { + fid = r.URL.Path[commaSep+1 : dotSep] + } + + e = n.ParsePath(fid) + + return +} +func (n *Needle) ParsePath(fid string) (err error) { + length := len(fid) + if length <= 8 { + return fmt.Errorf("Invalid fid: %s", fid) + } + delta := "" + deltaIndex := strings.LastIndex(fid, "_") + if deltaIndex > 0 { + fid, delta = fid[0:deltaIndex], fid[deltaIndex+1:] + } + n.Id, n.Cookie, err = ParseKeyHash(fid) + if err != nil { + return err + } + if delta != "" { + if d, e := strconv.ParseUint(delta, 10, 64); e == nil { + n.Id += d + } else { + return e + } + } + return err +} + +func ParseKeyHash(key_hash_string string) (uint64, uint32, error) { + if len(key_hash_string) <= 8 { + return 0, 0, fmt.Errorf("KeyHash is too short.") + } + if len(key_hash_string) > 24 { + return 0, 0, fmt.Errorf("KeyHash is too long.") + } + split := len(key_hash_string) - 8 + key, err := strconv.ParseUint(key_hash_string[:split], 16, 64) + if err != nil { + return 0, 0, fmt.Errorf("Parse key error: %v", err) + } + hash, err := strconv.ParseUint(key_hash_string[split:], 16, 32) + if err != nil { + return 0, 0, fmt.Errorf("Parse hash error: %v", err) + } + return key, uint32(hash), nil +} diff --git a/weed/storage/needle_byte_cache.go b/weed/storage/needle_byte_cache.go new file mode 100644 index 000000000..ae35a48ba --- /dev/null +++ b/weed/storage/needle_byte_cache.go @@ -0,0 +1,75 @@ +package storage + +import ( + "fmt" + "os" + "sync/atomic" + + "github.com/hashicorp/golang-lru" + + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + bytesCache *lru.Cache + bytesPool *util.BytesPool +) + +/* +There are one level of caching, and one level of pooling. + +In pooling, all []byte are fetched and returned to the pool bytesPool. + +In caching, the string~[]byte mapping is cached +*/ +func init() { + bytesPool = util.NewBytesPool() + bytesCache, _ = lru.NewWithEvict(512, func(key interface{}, value interface{}) { + value.(*Block).decreaseReference() + }) +} + +type Block struct { + Bytes []byte + refCount int32 +} + +func (block *Block) decreaseReference() { + if atomic.AddInt32(&block.refCount, -1) == 0 { + bytesPool.Put(block.Bytes) + } +} +func (block *Block) increaseReference() { + atomic.AddInt32(&block.refCount, 1) +} + +// get bytes from the LRU cache of []byte first, then from the bytes pool +// when []byte in LRU cache is evicted, it will be put back to the bytes pool +func getBytesForFileBlock(r *os.File, offset int64, readSize int) (dataSlice []byte, block *Block, err error) { + // check cache, return if found + cacheKey := fmt.Sprintf("%d:%d:%d", r.Fd(), offset>>3, readSize) + if obj, found := bytesCache.Get(cacheKey); found { + block = obj.(*Block) + block.increaseReference() + dataSlice = block.Bytes[0:readSize] + return dataSlice, block, nil + } + + // get the []byte from pool + b := bytesPool.Get(readSize) + // refCount = 2, one by the bytesCache, one by the actual needle object + block = &Block{Bytes: b, refCount: 2} + dataSlice = block.Bytes[0:readSize] + _, err = r.ReadAt(dataSlice, offset) + bytesCache.Add(cacheKey, block) + return dataSlice, block, err +} + +func (n *Needle) ReleaseMemory() { + if n.rawBlock != nil { + n.rawBlock.decreaseReference() + } +} +func ReleaseBytes(b []byte) { + bytesPool.Put(b) +} diff --git a/weed/storage/needle_map.go b/weed/storage/needle_map.go new file mode 100644 index 000000000..05bc6e86c --- /dev/null +++ b/weed/storage/needle_map.go @@ -0,0 +1,123 @@ +package storage + +import ( + "fmt" + "io/ioutil" + "os" + "sync" + + "github.com/chrislusf/seaweedfs/weed/util" +) + +type NeedleMapType int + +const ( + NeedleMapInMemory NeedleMapType = iota + NeedleMapLevelDb + NeedleMapBoltDb +) + +type NeedleMapper interface { + Put(key uint64, offset uint32, size uint32) error + Get(key uint64) (element *NeedleValue, ok bool) + Delete(key uint64) error + Close() + Destroy() error + ContentSize() uint64 + DeletedSize() uint64 + FileCount() int + DeletedCount() int + MaxFileKey() uint64 + IndexFileSize() uint64 + IndexFileContent() ([]byte, error) + IndexFileName() string +} + +type baseNeedleMapper struct { + indexFile *os.File + indexFileAccessLock sync.Mutex + + mapMetric +} + +func (nm *baseNeedleMapper) IndexFileSize() uint64 { + stat, err := nm.indexFile.Stat() + if err == nil { + return uint64(stat.Size()) + } + return 0 +} + +func (nm *baseNeedleMapper) IndexFileName() string { + return nm.indexFile.Name() +} + +func idxFileEntry(bytes []byte) (key uint64, offset uint32, size uint32) { + key = util.BytesToUint64(bytes[:8]) + offset = util.BytesToUint32(bytes[8:12]) + size = util.BytesToUint32(bytes[12:16]) + return +} +func (nm *baseNeedleMapper) appendToIndexFile(key uint64, offset uint32, size uint32) error { + bytes := make([]byte, 16) + util.Uint64toBytes(bytes[0:8], key) + util.Uint32toBytes(bytes[8:12], offset) + util.Uint32toBytes(bytes[12:16], size) + + nm.indexFileAccessLock.Lock() + defer nm.indexFileAccessLock.Unlock() + if _, err := nm.indexFile.Seek(0, 2); err != nil { + return fmt.Errorf("cannot seek end of indexfile %s: %v", + nm.indexFile.Name(), err) + } + _, err := nm.indexFile.Write(bytes) + return err +} +func (nm *baseNeedleMapper) IndexFileContent() ([]byte, error) { + nm.indexFileAccessLock.Lock() + defer nm.indexFileAccessLock.Unlock() + return ioutil.ReadFile(nm.indexFile.Name()) +} + +type mapMetric struct { + indexFile *os.File + + DeletionCounter int `json:"DeletionCounter"` + FileCounter int `json:"FileCounter"` + DeletionByteCounter uint64 `json:"DeletionByteCounter"` + FileByteCounter uint64 `json:"FileByteCounter"` + MaximumFileKey uint64 `json:"MaxFileKey"` +} + +func (mm *mapMetric) logDelete(deletedByteCount uint32) { + mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount) + mm.DeletionCounter++ +} + +func (mm *mapMetric) logPut(key uint64, oldSize uint32, newSize uint32) { + if key > mm.MaximumFileKey { + mm.MaximumFileKey = key + } + mm.FileCounter++ + mm.FileByteCounter = mm.FileByteCounter + uint64(newSize) + if oldSize > 0 { + mm.DeletionCounter++ + mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize) + } +} + +func (mm mapMetric) ContentSize() uint64 { + return mm.FileByteCounter +} +func (mm mapMetric) DeletedSize() uint64 { + return mm.DeletionByteCounter +} +func (mm mapMetric) FileCount() int { + return mm.FileCounter +} +func (mm mapMetric) DeletedCount() int { + return mm.DeletionCounter +} +func (mm mapMetric) MaxFileKey() uint64 { + return mm.MaximumFileKey +} diff --git a/weed/storage/needle_map_boltdb.go b/weed/storage/needle_map_boltdb.go new file mode 100644 index 000000000..bd3edf28d --- /dev/null +++ b/weed/storage/needle_map_boltdb.go @@ -0,0 +1,165 @@ +package storage + +import ( + "fmt" + "os" + + "github.com/boltdb/bolt" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type BoltDbNeedleMap struct { + dbFileName string + db *bolt.DB + baseNeedleMapper +} + +var boltdbBucket = []byte("weed") + +func NewBoltDbNeedleMap(dbFileName string, indexFile *os.File) (m *BoltDbNeedleMap, err error) { + m = &BoltDbNeedleMap{dbFileName: dbFileName} + m.indexFile = indexFile + if !isBoltDbFresh(dbFileName, indexFile) { + glog.V(1).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name()) + generateBoltDbFile(dbFileName, indexFile) + glog.V(1).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) + } + glog.V(1).Infof("Opening %s...", dbFileName) + if m.db, err = bolt.Open(dbFileName, 0644, nil); err != nil { + return + } + glog.V(1).Infof("Loading %s...", indexFile.Name()) + nm, indexLoadError := LoadNeedleMap(indexFile) + if indexLoadError != nil { + return nil, indexLoadError + } + m.mapMetric = nm.mapMetric + return +} + +func isBoltDbFresh(dbFileName string, indexFile *os.File) bool { + // normally we always write to index file first + dbLogFile, err := os.Open(dbFileName) + if err != nil { + return false + } + defer dbLogFile.Close() + dbStat, dbStatErr := dbLogFile.Stat() + indexStat, indexStatErr := indexFile.Stat() + if dbStatErr != nil || indexStatErr != nil { + glog.V(0).Infof("Can not stat file: %v and %v", dbStatErr, indexStatErr) + return false + } + + return dbStat.ModTime().After(indexStat.ModTime()) +} + +func generateBoltDbFile(dbFileName string, indexFile *os.File) error { + db, err := bolt.Open(dbFileName, 0644, nil) + if err != nil { + return err + } + defer db.Close() + return WalkIndexFile(indexFile, func(key uint64, offset, size uint32) error { + if offset > 0 { + boltDbWrite(db, key, offset, size) + } else { + boltDbDelete(db, key) + } + return nil + }) +} + +func (m *BoltDbNeedleMap) Get(key uint64) (element *NeedleValue, ok bool) { + bytes := make([]byte, 8) + var data []byte + util.Uint64toBytes(bytes, key) + err := m.db.View(func(tx *bolt.Tx) error { + bucket := tx.Bucket(boltdbBucket) + if bucket == nil { + return fmt.Errorf("Bucket %q not found!", boltdbBucket) + } + + data = bucket.Get(bytes) + return nil + }) + + if err != nil || len(data) != 8 { + return nil, false + } + offset := util.BytesToUint32(data[0:4]) + size := util.BytesToUint32(data[4:8]) + return &NeedleValue{Key: Key(key), Offset: offset, Size: size}, true +} + +func (m *BoltDbNeedleMap) Put(key uint64, offset uint32, size uint32) error { + var oldSize uint32 + if oldNeedle, ok := m.Get(key); ok { + oldSize = oldNeedle.Size + } + m.logPut(key, oldSize, size) + // write to index file first + if err := m.appendToIndexFile(key, offset, size); err != nil { + return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err) + } + return boltDbWrite(m.db, key, offset, size) +} + +func boltDbWrite(db *bolt.DB, + key uint64, offset uint32, size uint32) error { + bytes := make([]byte, 16) + util.Uint64toBytes(bytes[0:8], key) + util.Uint32toBytes(bytes[8:12], offset) + util.Uint32toBytes(bytes[12:16], size) + return db.Update(func(tx *bolt.Tx) error { + bucket, err := tx.CreateBucketIfNotExists(boltdbBucket) + if err != nil { + return err + } + + err = bucket.Put(bytes[0:8], bytes[8:16]) + if err != nil { + return err + } + return nil + }) +} +func boltDbDelete(db *bolt.DB, key uint64) error { + bytes := make([]byte, 8) + util.Uint64toBytes(bytes, key) + return db.Update(func(tx *bolt.Tx) error { + bucket, err := tx.CreateBucketIfNotExists(boltdbBucket) + if err != nil { + return err + } + + err = bucket.Delete(bytes) + if err != nil { + return err + } + return nil + }) +} + +func (m *BoltDbNeedleMap) Delete(key uint64) error { + if oldNeedle, ok := m.Get(key); ok { + m.logDelete(oldNeedle.Size) + } + // write to index file first + if err := m.appendToIndexFile(key, 0, 0); err != nil { + return err + } + return boltDbDelete(m.db, key) +} + +func (m *BoltDbNeedleMap) Close() { + m.db.Close() +} + +func (m *BoltDbNeedleMap) Destroy() error { + m.Close() + os.Remove(m.indexFile.Name()) + return os.Remove(m.dbFileName) +} diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go new file mode 100644 index 000000000..1789dbb12 --- /dev/null +++ b/weed/storage/needle_map_leveldb.go @@ -0,0 +1,134 @@ +package storage + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/syndtr/goleveldb/leveldb" +) + +type LevelDbNeedleMap struct { + dbFileName string + db *leveldb.DB + baseNeedleMapper +} + +func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File) (m *LevelDbNeedleMap, err error) { + m = &LevelDbNeedleMap{dbFileName: dbFileName} + m.indexFile = indexFile + if !isLevelDbFresh(dbFileName, indexFile) { + glog.V(1).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name()) + generateLevelDbFile(dbFileName, indexFile) + glog.V(1).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) + } + glog.V(1).Infof("Opening %s...", dbFileName) + if m.db, err = leveldb.OpenFile(dbFileName, nil); err != nil { + return + } + glog.V(1).Infof("Loading %s...", indexFile.Name()) + nm, indexLoadError := LoadNeedleMap(indexFile) + if indexLoadError != nil { + return nil, indexLoadError + } + m.mapMetric = nm.mapMetric + return +} + +func isLevelDbFresh(dbFileName string, indexFile *os.File) bool { + // normally we always write to index file first + dbLogFile, err := os.Open(filepath.Join(dbFileName, "LOG")) + if err != nil { + return false + } + defer dbLogFile.Close() + dbStat, dbStatErr := dbLogFile.Stat() + indexStat, indexStatErr := indexFile.Stat() + if dbStatErr != nil || indexStatErr != nil { + glog.V(0).Infof("Can not stat file: %v and %v", dbStatErr, indexStatErr) + return false + } + + return dbStat.ModTime().After(indexStat.ModTime()) +} + +func generateLevelDbFile(dbFileName string, indexFile *os.File) error { + db, err := leveldb.OpenFile(dbFileName, nil) + if err != nil { + return err + } + defer db.Close() + return WalkIndexFile(indexFile, func(key uint64, offset, size uint32) error { + if offset > 0 { + levelDbWrite(db, key, offset, size) + } else { + levelDbDelete(db, key) + } + return nil + }) +} + +func (m *LevelDbNeedleMap) Get(key uint64) (element *NeedleValue, ok bool) { + bytes := make([]byte, 8) + util.Uint64toBytes(bytes, key) + data, err := m.db.Get(bytes, nil) + if err != nil || len(data) != 8 { + return nil, false + } + offset := util.BytesToUint32(data[0:4]) + size := util.BytesToUint32(data[4:8]) + return &NeedleValue{Key: Key(key), Offset: offset, Size: size}, true +} + +func (m *LevelDbNeedleMap) Put(key uint64, offset uint32, size uint32) error { + var oldSize uint32 + if oldNeedle, ok := m.Get(key); ok { + oldSize = oldNeedle.Size + } + m.logPut(key, oldSize, size) + // write to index file first + if err := m.appendToIndexFile(key, offset, size); err != nil { + return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err) + } + return levelDbWrite(m.db, key, offset, size) +} + +func levelDbWrite(db *leveldb.DB, + key uint64, offset uint32, size uint32) error { + bytes := make([]byte, 16) + util.Uint64toBytes(bytes[0:8], key) + util.Uint32toBytes(bytes[8:12], offset) + util.Uint32toBytes(bytes[12:16], size) + if err := db.Put(bytes[0:8], bytes[8:16], nil); err != nil { + return fmt.Errorf("failed to write leveldb: %v", err) + } + return nil +} +func levelDbDelete(db *leveldb.DB, key uint64) error { + bytes := make([]byte, 8) + util.Uint64toBytes(bytes, key) + return db.Delete(bytes, nil) +} + +func (m *LevelDbNeedleMap) Delete(key uint64) error { + if oldNeedle, ok := m.Get(key); ok { + m.logDelete(oldNeedle.Size) + } + // write to index file first + if err := m.appendToIndexFile(key, 0, 0); err != nil { + return err + } + return levelDbDelete(m.db, key) +} + +func (m *LevelDbNeedleMap) Close() { + m.db.Close() +} + +func (m *LevelDbNeedleMap) Destroy() error { + m.Close() + os.Remove(m.indexFile.Name()) + return os.Remove(m.dbFileName) +} diff --git a/weed/storage/needle_map_memory.go b/weed/storage/needle_map_memory.go new file mode 100644 index 000000000..f2f4835df --- /dev/null +++ b/weed/storage/needle_map_memory.go @@ -0,0 +1,106 @@ +package storage + +import ( + "io" + "os" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type NeedleMap struct { + m CompactMap + + baseNeedleMapper +} + +func NewNeedleMap(file *os.File) *NeedleMap { + nm := &NeedleMap{ + m: NewCompactMap(), + } + nm.indexFile = file + return nm +} + +const ( + RowsToRead = 1024 +) + +func LoadNeedleMap(file *os.File) (*NeedleMap, error) { + nm := NewNeedleMap(file) + e := WalkIndexFile(file, func(key uint64, offset, size uint32) error { + if key > nm.MaximumFileKey { + nm.MaximumFileKey = key + } + nm.FileCounter++ + nm.FileByteCounter = nm.FileByteCounter + uint64(size) + if offset > 0 { + oldSize := nm.m.Set(Key(key), offset, size) + glog.V(3).Infoln("reading key", key, "offset", offset*NeedlePaddingSize, "size", size, "oldSize", oldSize) + if oldSize > 0 { + nm.DeletionCounter++ + nm.DeletionByteCounter = nm.DeletionByteCounter + uint64(oldSize) + } + } else { + oldSize := nm.m.Delete(Key(key)) + glog.V(3).Infoln("removing key", key, "offset", offset*NeedlePaddingSize, "size", size, "oldSize", oldSize) + nm.DeletionCounter++ + nm.DeletionByteCounter = nm.DeletionByteCounter + uint64(oldSize) + } + return nil + }) + glog.V(1).Infoln("max file key:", nm.MaximumFileKey) + return nm, e +} + +// walks through the index file, calls fn function with each key, offset, size +// stops with the error returned by the fn function +func WalkIndexFile(r *os.File, fn func(key uint64, offset, size uint32) error) error { + var readerOffset int64 + bytes := make([]byte, 16*RowsToRead) + count, e := r.ReadAt(bytes, readerOffset) + glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e) + readerOffset += int64(count) + var ( + key uint64 + offset, size uint32 + i int + ) + + for count > 0 && e == nil || e == io.EOF { + for i = 0; i+16 <= count; i += 16 { + key, offset, size = idxFileEntry(bytes[i : i+16]) + if e = fn(key, offset, size); e != nil { + return e + } + } + if e == io.EOF { + return nil + } + count, e = r.ReadAt(bytes, readerOffset) + glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e) + readerOffset += int64(count) + } + return e +} + +func (nm *NeedleMap) Put(key uint64, offset uint32, size uint32) error { + oldSize := nm.m.Set(Key(key), offset, size) + nm.logPut(key, oldSize, size) + return nm.appendToIndexFile(key, offset, size) +} +func (nm *NeedleMap) Get(key uint64) (element *NeedleValue, ok bool) { + element, ok = nm.m.Get(Key(key)) + return +} +func (nm *NeedleMap) Delete(key uint64) error { + deletedBytes := nm.m.Delete(Key(key)) + nm.logDelete(deletedBytes) + return nm.appendToIndexFile(key, 0, 0) +} +func (nm *NeedleMap) Close() { + _ = nm.indexFile.Close() +} +func (nm *NeedleMap) Destroy() error { + nm.Close() + return os.Remove(nm.indexFile.Name()) +} diff --git a/weed/storage/needle_read_write.go b/weed/storage/needle_read_write.go new file mode 100644 index 000000000..2f26147d6 --- /dev/null +++ b/weed/storage/needle_read_write.go @@ -0,0 +1,291 @@ +package storage + +import ( + "errors" + "fmt" + "io" + "os" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + FlagGzip = 0x01 + FlagHasName = 0x02 + FlagHasMime = 0x04 + FlagHasLastModifiedDate = 0x08 + FlagHasTtl = 0x10 + FlagIsChunkManifest = 0x80 + LastModifiedBytesLength = 5 + TtlBytesLength = 2 +) + +func (n *Needle) DiskSize() int64 { + padding := NeedlePaddingSize - ((NeedleHeaderSize + int64(n.Size) + NeedleChecksumSize) % NeedlePaddingSize) + return NeedleHeaderSize + int64(n.Size) + padding + NeedleChecksumSize +} +func (n *Needle) Append(w io.Writer, version Version) (size uint32, err error) { + if s, ok := w.(io.Seeker); ok { + if end, e := s.Seek(0, 1); e == nil { + defer func(s io.Seeker, off int64) { + if err != nil { + if _, e = s.Seek(off, 0); e != nil { + glog.V(0).Infof("Failed to seek %s back to %d with error: %v", w, off, e) + } + } + }(s, end) + } else { + err = fmt.Errorf("Cannot Read Current Volume Position: %v", e) + return + } + } + switch version { + case Version1: + header := make([]byte, NeedleHeaderSize) + util.Uint32toBytes(header[0:4], n.Cookie) + util.Uint64toBytes(header[4:12], n.Id) + n.Size = uint32(len(n.Data)) + size = n.Size + util.Uint32toBytes(header[12:16], n.Size) + if _, err = w.Write(header); err != nil { + return + } + if _, err = w.Write(n.Data); err != nil { + return + } + padding := NeedlePaddingSize - ((NeedleHeaderSize + n.Size + NeedleChecksumSize) % NeedlePaddingSize) + util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value()) + _, err = w.Write(header[0 : NeedleChecksumSize+padding]) + return + case Version2: + header := make([]byte, NeedleHeaderSize) + util.Uint32toBytes(header[0:4], n.Cookie) + util.Uint64toBytes(header[4:12], n.Id) + n.DataSize, n.NameSize, n.MimeSize = uint32(len(n.Data)), uint8(len(n.Name)), uint8(len(n.Mime)) + if n.DataSize > 0 { + n.Size = 4 + n.DataSize + 1 + if n.HasName() { + n.Size = n.Size + 1 + uint32(n.NameSize) + } + if n.HasMime() { + n.Size = n.Size + 1 + uint32(n.MimeSize) + } + if n.HasLastModifiedDate() { + n.Size = n.Size + LastModifiedBytesLength + } + if n.HasTtl() { + n.Size = n.Size + TtlBytesLength + } + } else { + n.Size = 0 + } + size = n.DataSize + util.Uint32toBytes(header[12:16], n.Size) + if _, err = w.Write(header); err != nil { + return + } + if n.DataSize > 0 { + util.Uint32toBytes(header[0:4], n.DataSize) + if _, err = w.Write(header[0:4]); err != nil { + return + } + if _, err = w.Write(n.Data); err != nil { + return + } + util.Uint8toBytes(header[0:1], n.Flags) + if _, err = w.Write(header[0:1]); err != nil { + return + } + if n.HasName() { + util.Uint8toBytes(header[0:1], n.NameSize) + if _, err = w.Write(header[0:1]); err != nil { + return + } + if _, err = w.Write(n.Name); err != nil { + return + } + } + if n.HasMime() { + util.Uint8toBytes(header[0:1], n.MimeSize) + if _, err = w.Write(header[0:1]); err != nil { + return + } + if _, err = w.Write(n.Mime); err != nil { + return + } + } + if n.HasLastModifiedDate() { + util.Uint64toBytes(header[0:8], n.LastModified) + if _, err = w.Write(header[8-LastModifiedBytesLength : 8]); err != nil { + return + } + } + if n.HasTtl() && n.Ttl != nil { + n.Ttl.ToBytes(header[0:TtlBytesLength]) + if _, err = w.Write(header[0:TtlBytesLength]); err != nil { + return + } + } + } + padding := NeedlePaddingSize - ((NeedleHeaderSize + n.Size + NeedleChecksumSize) % NeedlePaddingSize) + util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value()) + _, err = w.Write(header[0 : NeedleChecksumSize+padding]) + return n.DataSize, err + } + return 0, fmt.Errorf("Unsupported Version! (%d)", version) +} + +func ReadNeedleBlob(r *os.File, offset int64, size uint32) (dataSlice []byte, block *Block, err error) { + padding := NeedlePaddingSize - ((NeedleHeaderSize + size + NeedleChecksumSize) % NeedlePaddingSize) + readSize := NeedleHeaderSize + size + NeedleChecksumSize + padding + return getBytesForFileBlock(r, offset, int(readSize)) +} + +func (n *Needle) ReadData(r *os.File, offset int64, size uint32, version Version) (err error) { + bytes, block, err := ReadNeedleBlob(r, offset, size) + if err != nil { + return err + } + n.rawBlock = block + n.ParseNeedleHeader(bytes) + if n.Size != size { + return fmt.Errorf("File Entry Not Found. Needle %d Memory %d", n.Size, size) + } + switch version { + case Version1: + n.Data = bytes[NeedleHeaderSize : NeedleHeaderSize+size] + case Version2: + n.readNeedleDataVersion2(bytes[NeedleHeaderSize : NeedleHeaderSize+int(n.Size)]) + } + checksum := util.BytesToUint32(bytes[NeedleHeaderSize+size : NeedleHeaderSize+size+NeedleChecksumSize]) + newChecksum := NewCRC(n.Data) + if checksum != newChecksum.Value() { + return errors.New("CRC error! Data On Disk Corrupted") + } + n.Checksum = newChecksum + return nil +} +func (n *Needle) ParseNeedleHeader(bytes []byte) { + n.Cookie = util.BytesToUint32(bytes[0:4]) + n.Id = util.BytesToUint64(bytes[4:12]) + n.Size = util.BytesToUint32(bytes[12:NeedleHeaderSize]) +} +func (n *Needle) readNeedleDataVersion2(bytes []byte) { + index, lenBytes := 0, len(bytes) + if index < lenBytes { + n.DataSize = util.BytesToUint32(bytes[index : index+4]) + index = index + 4 + if int(n.DataSize)+index > lenBytes { + // this if clause is due to bug #87 and #93, fixed in v0.69 + // remove this clause later + return + } + n.Data = bytes[index : index+int(n.DataSize)] + index = index + int(n.DataSize) + n.Flags = bytes[index] + index = index + 1 + } + if index < lenBytes && n.HasName() { + n.NameSize = uint8(bytes[index]) + index = index + 1 + n.Name = bytes[index : index+int(n.NameSize)] + index = index + int(n.NameSize) + } + if index < lenBytes && n.HasMime() { + n.MimeSize = uint8(bytes[index]) + index = index + 1 + n.Mime = bytes[index : index+int(n.MimeSize)] + index = index + int(n.MimeSize) + } + if index < lenBytes && n.HasLastModifiedDate() { + n.LastModified = util.BytesToUint64(bytes[index : index+LastModifiedBytesLength]) + index = index + LastModifiedBytesLength + } + if index < lenBytes && n.HasTtl() { + n.Ttl = LoadTTLFromBytes(bytes[index : index+TtlBytesLength]) + index = index + TtlBytesLength + } +} + +func ReadNeedleHeader(r *os.File, version Version, offset int64) (n *Needle, bodyLength uint32, err error) { + n = new(Needle) + if version == Version1 || version == Version2 { + bytes := make([]byte, NeedleHeaderSize) + var count int + count, err = r.ReadAt(bytes, offset) + if count <= 0 || err != nil { + return nil, 0, err + } + n.ParseNeedleHeader(bytes) + padding := NeedlePaddingSize - ((n.Size + NeedleHeaderSize + NeedleChecksumSize) % NeedlePaddingSize) + bodyLength = n.Size + NeedleChecksumSize + padding + } + return +} + +//n should be a needle already read the header +//the input stream will read until next file entry +func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength uint32) (err error) { + if bodyLength <= 0 { + return nil + } + switch version { + case Version1: + bytes := make([]byte, bodyLength) + if _, err = r.ReadAt(bytes, offset); err != nil { + return + } + n.Data = bytes[:n.Size] + n.Checksum = NewCRC(n.Data) + case Version2: + bytes := make([]byte, bodyLength) + if _, err = r.ReadAt(bytes, offset); err != nil { + return + } + n.readNeedleDataVersion2(bytes[0:n.Size]) + n.Checksum = NewCRC(n.Data) + default: + err = fmt.Errorf("Unsupported Version! (%d)", version) + } + return +} + +func (n *Needle) IsGzipped() bool { + return n.Flags&FlagGzip > 0 +} +func (n *Needle) SetGzipped() { + n.Flags = n.Flags | FlagGzip +} +func (n *Needle) HasName() bool { + return n.Flags&FlagHasName > 0 +} +func (n *Needle) SetHasName() { + n.Flags = n.Flags | FlagHasName +} +func (n *Needle) HasMime() bool { + return n.Flags&FlagHasMime > 0 +} +func (n *Needle) SetHasMime() { + n.Flags = n.Flags | FlagHasMime +} +func (n *Needle) HasLastModifiedDate() bool { + return n.Flags&FlagHasLastModifiedDate > 0 +} +func (n *Needle) SetHasLastModifiedDate() { + n.Flags = n.Flags | FlagHasLastModifiedDate +} +func (n *Needle) HasTtl() bool { + return n.Flags&FlagHasTtl > 0 +} +func (n *Needle) SetHasTtl() { + n.Flags = n.Flags | FlagHasTtl +} + +func (n *Needle) IsChunkedManifest() bool { + return n.Flags&FlagIsChunkManifest > 0 +} + +func (n *Needle) SetIsChunkManifest() { + n.Flags = n.Flags | FlagIsChunkManifest +} diff --git a/weed/storage/needle_test.go b/weed/storage/needle_test.go new file mode 100644 index 000000000..c05afda2f --- /dev/null +++ b/weed/storage/needle_test.go @@ -0,0 +1,45 @@ +package storage + +import "testing" + +func TestParseKeyHash(t *testing.T) { + testcases := []struct { + KeyHash string + ID uint64 + Cookie uint32 + Err bool + }{ + // normal + {"4ed4c8116e41", 0x4ed4, 0xc8116e41, false}, + // cookie with leading zeros + {"4ed401116e41", 0x4ed4, 0x01116e41, false}, + // odd length + {"ed400116e41", 0xed4, 0x00116e41, false}, + // uint + {"fed4c8114ed4c811f0116e41", 0xfed4c8114ed4c811, 0xf0116e41, false}, + // err: too short + {"4ed4c811", 0, 0, true}, + // err: too long + {"4ed4c8114ed4c8114ed4c8111", 0, 0, true}, + // err: invalid character + {"helloworld", 0, 0, true}, + } + + for _, tc := range testcases { + if id, cookie, err := ParseKeyHash(tc.KeyHash); err != nil && !tc.Err { + t.Fatalf("Parse %s error: %v", tc.KeyHash, err) + } else if err == nil && tc.Err { + t.Fatalf("Parse %s expected error got nil", tc.KeyHash) + } else if id != tc.ID || cookie != tc.Cookie { + t.Fatalf("Parse %s wrong result. Expected: (%d, %d) got: (%d, %d)", tc.KeyHash, tc.ID, tc.Cookie, id, cookie) + } + } +} + +func BenchmarkParseKeyHash(b *testing.B) { + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + ParseKeyHash("4ed44ed44ed44ed4c8116e41") + } +} diff --git a/weed/storage/replica_placement.go b/weed/storage/replica_placement.go new file mode 100644 index 000000000..c1aca52eb --- /dev/null +++ b/weed/storage/replica_placement.go @@ -0,0 +1,53 @@ +package storage + +import ( + "errors" + "fmt" +) + +type ReplicaPlacement struct { + SameRackCount int + DiffRackCount int + DiffDataCenterCount int +} + +func NewReplicaPlacementFromString(t string) (*ReplicaPlacement, error) { + rp := &ReplicaPlacement{} + for i, c := range t { + count := int(c - '0') + if 0 <= count && count <= 2 { + switch i { + case 0: + rp.DiffDataCenterCount = count + case 1: + rp.DiffRackCount = count + case 2: + rp.SameRackCount = count + } + } else { + return rp, errors.New("Unknown Replication Type:" + t) + } + } + return rp, nil +} + +func NewReplicaPlacementFromByte(b byte) (*ReplicaPlacement, error) { + return NewReplicaPlacementFromString(fmt.Sprintf("%03d", b)) +} + +func (rp *ReplicaPlacement) Byte() byte { + ret := rp.DiffDataCenterCount*100 + rp.DiffRackCount*10 + rp.SameRackCount + return byte(ret) +} + +func (rp *ReplicaPlacement) String() string { + b := make([]byte, 3) + b[0] = byte(rp.DiffDataCenterCount + '0') + b[1] = byte(rp.DiffRackCount + '0') + b[2] = byte(rp.SameRackCount + '0') + return string(b) +} + +func (rp *ReplicaPlacement) GetCopyCount() int { + return rp.DiffDataCenterCount + rp.DiffRackCount + rp.SameRackCount + 1 +} diff --git a/weed/storage/replica_placement_test.go b/weed/storage/replica_placement_test.go new file mode 100644 index 000000000..9c2161e94 --- /dev/null +++ b/weed/storage/replica_placement_test.go @@ -0,0 +1,14 @@ +package storage + +import ( + "testing" +) + +func TestReplicaPlacemnetSerialDeserial(t *testing.T) { + rp, _ := NewReplicaPlacementFromString("001") + new_rp, _ := NewReplicaPlacementFromByte(rp.Byte()) + if rp.String() != new_rp.String() { + println("expected:", rp.String(), "actual:", new_rp.String()) + t.Fail() + } +} diff --git a/weed/storage/store.go b/weed/storage/store.go new file mode 100644 index 000000000..d44d6a863 --- /dev/null +++ b/weed/storage/store.go @@ -0,0 +1,340 @@ +package storage + +import ( + "encoding/json" + "errors" + "fmt" + "math/rand" + "strconv" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/golang/protobuf/proto" +) + +const ( + MAX_TTL_VOLUME_REMOVAL_DELAY = 10 // 10 minutes +) + +type MasterNodes struct { + nodes []string + lastNode int +} + +func (mn *MasterNodes) String() string { + return fmt.Sprintf("nodes:%v, lastNode:%d", mn.nodes, mn.lastNode) +} + +func NewMasterNodes(bootstrapNode string) (mn *MasterNodes) { + mn = &MasterNodes{nodes: []string{bootstrapNode}, lastNode: -1} + return +} +func (mn *MasterNodes) reset() { + glog.V(4).Infof("Resetting master nodes: %v", mn) + if len(mn.nodes) > 1 && mn.lastNode >= 0 { + glog.V(0).Infof("Reset master %s from: %v", mn.nodes[mn.lastNode], mn.nodes) + mn.lastNode = -mn.lastNode - 1 + } +} +func (mn *MasterNodes) findMaster() (string, error) { + if len(mn.nodes) == 0 { + return "", errors.New("No master node found!") + } + if mn.lastNode < 0 { + for _, m := range mn.nodes { + glog.V(4).Infof("Listing masters on %s", m) + if masters, e := operation.ListMasters(m); e == nil { + if len(masters) == 0 { + continue + } + mn.nodes = append(masters, m) + mn.lastNode = rand.Intn(len(mn.nodes)) + glog.V(2).Infof("current master nodes is %v", mn) + break + } else { + glog.V(4).Infof("Failed listing masters on %s: %v", m, e) + } + } + } + if mn.lastNode < 0 { + return "", errors.New("No master node available!") + } + return mn.nodes[mn.lastNode], nil +} + +/* + * A VolumeServer contains one Store + */ +type Store struct { + Ip string + Port int + PublicUrl string + Locations []*DiskLocation + dataCenter string //optional informaton, overwriting master setting if exists + rack string //optional information, overwriting master setting if exists + connected bool + volumeSizeLimit uint64 //read from the master + masterNodes *MasterNodes +} + +func (s *Store) String() (str string) { + str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d, masterNodes:%s", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.volumeSizeLimit, s.masterNodes) + return +} + +func NewStore(port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int, needleMapKind NeedleMapType) (s *Store) { + s = &Store{Port: port, Ip: ip, PublicUrl: publicUrl} + s.Locations = make([]*DiskLocation, 0) + for i := 0; i < len(dirnames); i++ { + location := NewDiskLocation(dirnames[i], maxVolumeCounts[i]) + location.loadExistingVolumes(needleMapKind) + s.Locations = append(s.Locations, location) + } + return +} +func (s *Store) AddVolume(volumeListString string, collection string, needleMapKind NeedleMapType, replicaPlacement string, ttlString string) error { + rt, e := NewReplicaPlacementFromString(replicaPlacement) + if e != nil { + return e + } + ttl, e := ReadTTL(ttlString) + if e != nil { + return e + } + for _, range_string := range strings.Split(volumeListString, ",") { + if strings.Index(range_string, "-") < 0 { + id_string := range_string + id, err := NewVolumeId(id_string) + if err != nil { + return fmt.Errorf("Volume Id %s is not a valid unsigned integer!", id_string) + } + e = s.addVolume(VolumeId(id), collection, needleMapKind, rt, ttl) + } else { + pair := strings.Split(range_string, "-") + start, start_err := strconv.ParseUint(pair[0], 10, 64) + if start_err != nil { + return fmt.Errorf("Volume Start Id %s is not a valid unsigned integer!", pair[0]) + } + end, end_err := strconv.ParseUint(pair[1], 10, 64) + if end_err != nil { + return fmt.Errorf("Volume End Id %s is not a valid unsigned integer!", pair[1]) + } + for id := start; id <= end; id++ { + if err := s.addVolume(VolumeId(id), collection, needleMapKind, rt, ttl); err != nil { + e = err + } + } + } + } + return e +} +func (s *Store) DeleteCollection(collection string) (e error) { + for _, location := range s.Locations { + e = location.DeleteCollectionFromDiskLocation(collection) + if e != nil { + return + } + } + return +} + +func (s *Store) findVolume(vid VolumeId) *Volume { + for _, location := range s.Locations { + if v, found := location.volumes[vid]; found { + return v + } + } + return nil +} +func (s *Store) findFreeLocation() (ret *DiskLocation) { + max := 0 + for _, location := range s.Locations { + currentFreeCount := location.MaxVolumeCount - len(location.volumes) + if currentFreeCount > max { + max = currentFreeCount + ret = location + } + } + return ret +} +func (s *Store) addVolume(vid VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement *ReplicaPlacement, ttl *TTL) error { + if s.findVolume(vid) != nil { + return fmt.Errorf("Volume Id %d already exists!", vid) + } + if location := s.findFreeLocation(); location != nil { + glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v", + location.Directory, vid, collection, replicaPlacement, ttl) + if volume, err := NewVolume(location.Directory, collection, vid, needleMapKind, replicaPlacement, ttl); err == nil { + location.volumes[vid] = volume + return nil + } else { + return err + } + } + return fmt.Errorf("No more free space left") +} + +func (s *Store) Status() []*VolumeInfo { + var stats []*VolumeInfo + for _, location := range s.Locations { + for k, v := range location.volumes { + s := &VolumeInfo{ + Id: VolumeId(k), + Size: v.ContentSize(), + Collection: v.Collection, + ReplicaPlacement: v.ReplicaPlacement, + Version: v.Version(), + FileCount: v.nm.FileCount(), + DeleteCount: v.nm.DeletedCount(), + DeletedByteCount: v.nm.DeletedSize(), + ReadOnly: v.readOnly, + Ttl: v.Ttl} + stats = append(stats, s) + } + } + sortVolumeInfos(stats) + return stats +} + +func (s *Store) SetDataCenter(dataCenter string) { + s.dataCenter = dataCenter +} +func (s *Store) SetRack(rack string) { + s.rack = rack +} + +func (s *Store) SetBootstrapMaster(bootstrapMaster string) { + s.masterNodes = NewMasterNodes(bootstrapMaster) +} +func (s *Store) SendHeartbeatToMaster() (masterNode string, secretKey security.Secret, e error) { + masterNode, e = s.masterNodes.findMaster() + if e != nil { + return + } + var volumeMessages []*operation.VolumeInformationMessage + maxVolumeCount := 0 + var maxFileKey uint64 + for _, location := range s.Locations { + maxVolumeCount = maxVolumeCount + location.MaxVolumeCount + for k, v := range location.volumes { + if maxFileKey < v.nm.MaxFileKey() { + maxFileKey = v.nm.MaxFileKey() + } + if !v.expired(s.volumeSizeLimit) { + volumeMessage := &operation.VolumeInformationMessage{ + Id: proto.Uint32(uint32(k)), + Size: proto.Uint64(uint64(v.Size())), + Collection: proto.String(v.Collection), + FileCount: proto.Uint64(uint64(v.nm.FileCount())), + DeleteCount: proto.Uint64(uint64(v.nm.DeletedCount())), + DeletedByteCount: proto.Uint64(v.nm.DeletedSize()), + ReadOnly: proto.Bool(v.readOnly), + ReplicaPlacement: proto.Uint32(uint32(v.ReplicaPlacement.Byte())), + Version: proto.Uint32(uint32(v.Version())), + Ttl: proto.Uint32(v.Ttl.ToUint32()), + } + volumeMessages = append(volumeMessages, volumeMessage) + } else { + if v.exiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) { + location.deleteVolumeById(v.Id) + glog.V(0).Infoln("volume", v.Id, "is deleted.") + } else { + glog.V(0).Infoln("volume", v.Id, "is expired.") + } + } + } + } + + joinMessage := &operation.JoinMessage{ + IsInit: proto.Bool(!s.connected), + Ip: proto.String(s.Ip), + Port: proto.Uint32(uint32(s.Port)), + PublicUrl: proto.String(s.PublicUrl), + MaxVolumeCount: proto.Uint32(uint32(maxVolumeCount)), + MaxFileKey: proto.Uint64(maxFileKey), + DataCenter: proto.String(s.dataCenter), + Rack: proto.String(s.rack), + Volumes: volumeMessages, + } + + data, err := proto.Marshal(joinMessage) + if err != nil { + return "", "", err + } + + joinUrl := "http://" + masterNode + "/dir/join" + glog.V(4).Infof("Connecting to %s ...", joinUrl) + + jsonBlob, err := util.PostBytes(joinUrl, data) + if err != nil { + s.masterNodes.reset() + return "", "", err + } + var ret operation.JoinResult + if err := json.Unmarshal(jsonBlob, &ret); err != nil { + glog.V(0).Infof("Failed to join %s with response: %s", joinUrl, string(jsonBlob)) + s.masterNodes.reset() + return masterNode, "", err + } + if ret.Error != "" { + s.masterNodes.reset() + return masterNode, "", errors.New(ret.Error) + } + s.volumeSizeLimit = ret.VolumeSizeLimit + secretKey = security.Secret(ret.SecretKey) + s.connected = true + return +} +func (s *Store) Close() { + for _, location := range s.Locations { + for _, v := range location.volumes { + v.Close() + } + } +} +func (s *Store) Write(i VolumeId, n *Needle) (size uint32, err error) { + if v := s.findVolume(i); v != nil { + if v.readOnly { + err = fmt.Errorf("Volume %d is read only", i) + return + } + if MaxPossibleVolumeSize >= v.ContentSize()+uint64(size) { + size, err = v.write(n) + } else { + err = fmt.Errorf("Volume Size Limit %d Exceeded! Current size is %d", s.volumeSizeLimit, v.ContentSize()) + } + if s.volumeSizeLimit < v.ContentSize()+3*uint64(size) { + glog.V(0).Infoln("volume", i, "size", v.ContentSize(), "will exceed limit", s.volumeSizeLimit) + if _, _, e := s.SendHeartbeatToMaster(); e != nil { + glog.V(0).Infoln("error when reporting size:", e) + } + } + return + } + glog.V(0).Infoln("volume", i, "not found!") + err = fmt.Errorf("Volume %d not found!", i) + return +} +func (s *Store) Delete(i VolumeId, n *Needle) (uint32, error) { + if v := s.findVolume(i); v != nil && !v.readOnly { + return v.delete(n) + } + return 0, nil +} +func (s *Store) ReadVolumeNeedle(i VolumeId, n *Needle) (int, error) { + if v := s.findVolume(i); v != nil { + return v.readNeedle(n) + } + return 0, fmt.Errorf("Volume %v not found!", i) +} +func (s *Store) GetVolume(i VolumeId) *Volume { + return s.findVolume(i) +} + +func (s *Store) HasVolume(i VolumeId) bool { + v := s.findVolume(i) + return v != nil +} diff --git a/weed/storage/store_vacuum.go b/weed/storage/store_vacuum.go new file mode 100644 index 000000000..03825c159 --- /dev/null +++ b/weed/storage/store_vacuum.go @@ -0,0 +1,44 @@ +package storage + +import ( + "fmt" + "strconv" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func (s *Store) CheckCompactVolume(volumeIdString string, garbageThresholdString string) (error, bool) { + vid, err := NewVolumeId(volumeIdString) + if err != nil { + return fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString), false + } + garbageThreshold, e := strconv.ParseFloat(garbageThresholdString, 32) + if e != nil { + return fmt.Errorf("garbageThreshold %s is not a valid float number", garbageThresholdString), false + } + if v := s.findVolume(vid); v != nil { + glog.V(3).Infoln(vid, "garbage level is", v.garbageLevel()) + return nil, garbageThreshold < v.garbageLevel() + } + return fmt.Errorf("volume id %d is not found during check compact", vid), false +} +func (s *Store) CompactVolume(volumeIdString string) error { + vid, err := NewVolumeId(volumeIdString) + if err != nil { + return fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) + } + if v := s.findVolume(vid); v != nil { + return v.Compact() + } + return fmt.Errorf("volume id %d is not found during compact", vid) +} +func (s *Store) CommitCompactVolume(volumeIdString string) error { + vid, err := NewVolumeId(volumeIdString) + if err != nil { + return fmt.Errorf("Volume Id %s is not a valid unsigned integer", volumeIdString) + } + if v := s.findVolume(vid); v != nil { + return v.commitCompact() + } + return fmt.Errorf("volume id %d is not found during commit compact", vid) +} diff --git a/weed/storage/volume.go b/weed/storage/volume.go new file mode 100644 index 000000000..d40bdc565 --- /dev/null +++ b/weed/storage/volume.go @@ -0,0 +1,430 @@ +package storage + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + "path" + "sync" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type Volume struct { + Id VolumeId + dir string + Collection string + dataFile *os.File + nm NeedleMapper + needleMapKind NeedleMapType + readOnly bool + + SuperBlock + + dataFileAccessLock sync.Mutex + lastModifiedTime uint64 //unix time in seconds +} + +func NewVolume(dirname string, collection string, id VolumeId, needleMapKind NeedleMapType, replicaPlacement *ReplicaPlacement, ttl *TTL) (v *Volume, e error) { + v = &Volume{dir: dirname, Collection: collection, Id: id} + v.SuperBlock = SuperBlock{ReplicaPlacement: replicaPlacement, Ttl: ttl} + v.needleMapKind = needleMapKind + e = v.load(true, true, needleMapKind) + return +} +func (v *Volume) String() string { + return fmt.Sprintf("Id:%v, dir:%s, Collection:%s, dataFile:%v, nm:%v, readOnly:%v", v.Id, v.dir, v.Collection, v.dataFile, v.nm, v.readOnly) +} + +func loadVolumeWithoutIndex(dirname string, collection string, id VolumeId, needleMapKind NeedleMapType) (v *Volume, e error) { + v = &Volume{dir: dirname, Collection: collection, Id: id} + v.SuperBlock = SuperBlock{} + v.needleMapKind = needleMapKind + e = v.load(false, false, needleMapKind) + return +} +func (v *Volume) FileName() (fileName string) { + if v.Collection == "" { + fileName = path.Join(v.dir, v.Id.String()) + } else { + fileName = path.Join(v.dir, v.Collection+"_"+v.Id.String()) + } + return +} +func (v *Volume) DataFile() *os.File { + return v.dataFile +} +func (v *Volume) load(alsoLoadIndex bool, createDatIfMissing bool, needleMapKind NeedleMapType) error { + var e error + fileName := v.FileName() + + if exists, canRead, canWrite, modifiedTime := checkFile(fileName + ".dat"); exists { + if !canRead { + return fmt.Errorf("cannot read Volume Data file %s.dat", fileName) + } + if canWrite { + v.dataFile, e = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644) + v.lastModifiedTime = uint64(modifiedTime.Unix()) + } else { + glog.V(0).Infoln("opening " + fileName + ".dat in READONLY mode") + v.dataFile, e = os.Open(fileName + ".dat") + v.readOnly = true + } + } else { + if createDatIfMissing { + v.dataFile, e = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644) + } else { + return fmt.Errorf("Volume Data file %s.dat does not exist.", fileName) + } + } + + if e != nil { + if !os.IsPermission(e) { + return fmt.Errorf("cannot load Volume Data %s.dat: %v", fileName, e) + } + } + + if v.ReplicaPlacement == nil { + e = v.readSuperBlock() + } else { + e = v.maybeWriteSuperBlock() + } + if e == nil && alsoLoadIndex { + var indexFile *os.File + if v.readOnly { + glog.V(1).Infoln("open to read file", fileName+".idx") + if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDONLY, 0644); e != nil { + return fmt.Errorf("cannot read Volume Index %s.idx: %v", fileName, e) + } + } else { + glog.V(1).Infoln("open to write file", fileName+".idx") + if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDWR|os.O_CREATE, 0644); e != nil { + return fmt.Errorf("cannot write Volume Index %s.idx: %v", fileName, e) + } + } + switch needleMapKind { + case NeedleMapInMemory: + glog.V(0).Infoln("loading index file", fileName+".idx", "readonly", v.readOnly) + if v.nm, e = LoadNeedleMap(indexFile); e != nil { + glog.V(0).Infof("loading index %s error: %v", fileName+".idx", e) + } + case NeedleMapLevelDb: + glog.V(0).Infoln("loading leveldb file", fileName+".ldb") + if v.nm, e = NewLevelDbNeedleMap(fileName+".ldb", indexFile); e != nil { + glog.V(0).Infof("loading leveldb %s error: %v", fileName+".ldb", e) + } + case NeedleMapBoltDb: + glog.V(0).Infoln("loading boltdb file", fileName+".bdb") + if v.nm, e = NewBoltDbNeedleMap(fileName+".bdb", indexFile); e != nil { + glog.V(0).Infof("loading boltdb %s error: %v", fileName+".bdb", e) + } + } + } + return e +} +func (v *Volume) Version() Version { + return v.SuperBlock.Version() +} +func (v *Volume) Size() int64 { + stat, e := v.dataFile.Stat() + if e == nil { + return stat.Size() + } + glog.V(0).Infof("Failed to read file size %s %v", v.dataFile.Name(), e) + return -1 +} + +// Close cleanly shuts down this volume +func (v *Volume) Close() { + v.dataFileAccessLock.Lock() + defer v.dataFileAccessLock.Unlock() + v.nm.Close() + _ = v.dataFile.Close() +} + +func (v *Volume) NeedToReplicate() bool { + return v.ReplicaPlacement.GetCopyCount() > 1 +} + +// isFileUnchanged checks whether this needle to write is same as last one. +// It requires serialized access in the same volume. +func (v *Volume) isFileUnchanged(n *Needle) bool { + if v.Ttl.String() != "" { + return false + } + nv, ok := v.nm.Get(n.Id) + if ok && nv.Offset > 0 { + oldNeedle := new(Needle) + err := oldNeedle.ReadData(v.dataFile, int64(nv.Offset)*NeedlePaddingSize, nv.Size, v.Version()) + if err != nil { + glog.V(0).Infof("Failed to check updated file %v", err) + return false + } + defer oldNeedle.ReleaseMemory() + if oldNeedle.Checksum == n.Checksum && bytes.Equal(oldNeedle.Data, n.Data) { + n.DataSize = oldNeedle.DataSize + return true + } + } + return false +} + +// Destroy removes everything related to this volume +func (v *Volume) Destroy() (err error) { + if v.readOnly { + err = fmt.Errorf("%s is read-only", v.dataFile.Name()) + return + } + v.Close() + err = os.Remove(v.dataFile.Name()) + if err != nil { + return + } + err = v.nm.Destroy() + return +} + +// AppendBlob append a blob to end of the data file, used in replication +func (v *Volume) AppendBlob(b []byte) (offset int64, err error) { + if v.readOnly { + err = fmt.Errorf("%s is read-only", v.dataFile.Name()) + return + } + v.dataFileAccessLock.Lock() + defer v.dataFileAccessLock.Unlock() + if offset, err = v.dataFile.Seek(0, 2); err != nil { + glog.V(0).Infof("failed to seek the end of file: %v", err) + return + } + //ensure file writing starting from aligned positions + if offset%NeedlePaddingSize != 0 { + offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize) + if offset, err = v.dataFile.Seek(offset, 0); err != nil { + glog.V(0).Infof("failed to align in datafile %s: %v", v.dataFile.Name(), err) + return + } + } + v.dataFile.Write(b) + return +} + +func (v *Volume) write(n *Needle) (size uint32, err error) { + glog.V(4).Infof("writing needle %s", NewFileIdFromNeedle(v.Id, n).String()) + if v.readOnly { + err = fmt.Errorf("%s is read-only", v.dataFile.Name()) + return + } + v.dataFileAccessLock.Lock() + defer v.dataFileAccessLock.Unlock() + if v.isFileUnchanged(n) { + size = n.DataSize + glog.V(4).Infof("needle is unchanged!") + return + } + var offset int64 + if offset, err = v.dataFile.Seek(0, 2); err != nil { + glog.V(0).Infof("failed to seek the end of file: %v", err) + return + } + + //ensure file writing starting from aligned positions + if offset%NeedlePaddingSize != 0 { + offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize) + if offset, err = v.dataFile.Seek(offset, 0); err != nil { + glog.V(0).Infof("failed to align in datafile %s: %v", v.dataFile.Name(), err) + return + } + } + + if size, err = n.Append(v.dataFile, v.Version()); err != nil { + if e := v.dataFile.Truncate(offset); e != nil { + err = fmt.Errorf("%s\ncannot truncate %s: %v", err, v.dataFile.Name(), e) + } + return + } + nv, ok := v.nm.Get(n.Id) + if !ok || int64(nv.Offset)*NeedlePaddingSize < offset { + if err = v.nm.Put(n.Id, uint32(offset/NeedlePaddingSize), n.Size); err != nil { + glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err) + } + } + if v.lastModifiedTime < n.LastModified { + v.lastModifiedTime = n.LastModified + } + return +} + +func (v *Volume) delete(n *Needle) (uint32, error) { + glog.V(4).Infof("delete needle %s", NewFileIdFromNeedle(v.Id, n).String()) + if v.readOnly { + return 0, fmt.Errorf("%s is read-only", v.dataFile.Name()) + } + v.dataFileAccessLock.Lock() + defer v.dataFileAccessLock.Unlock() + nv, ok := v.nm.Get(n.Id) + //fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size) + if ok { + size := nv.Size + if err := v.nm.Delete(n.Id); err != nil { + return size, err + } + if _, err := v.dataFile.Seek(0, 2); err != nil { + return size, err + } + n.Data = nil + _, err := n.Append(v.dataFile, v.Version()) + return size, err + } + return 0, nil +} + +// read fills in Needle content by looking up n.Id from NeedleMapper +func (v *Volume) readNeedle(n *Needle) (int, error) { + nv, ok := v.nm.Get(n.Id) + if !ok || nv.Offset == 0 { + return -1, errors.New("Not Found") + } + err := n.ReadData(v.dataFile, int64(nv.Offset)*NeedlePaddingSize, nv.Size, v.Version()) + if err != nil { + return 0, err + } + bytesRead := len(n.Data) + if !n.HasTtl() { + return bytesRead, nil + } + ttlMinutes := n.Ttl.Minutes() + if ttlMinutes == 0 { + return bytesRead, nil + } + if !n.HasLastModifiedDate() { + return bytesRead, nil + } + if uint64(time.Now().Unix()) < n.LastModified+uint64(ttlMinutes*60) { + return bytesRead, nil + } + n.ReleaseMemory() + return -1, errors.New("Not Found") +} + +func ScanVolumeFile(dirname string, collection string, id VolumeId, + needleMapKind NeedleMapType, + visitSuperBlock func(SuperBlock) error, + readNeedleBody bool, + visitNeedle func(n *Needle, offset int64) error) (err error) { + var v *Volume + if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil { + return fmt.Errorf("Failed to load volume %d: %v", id, err) + } + if err = visitSuperBlock(v.SuperBlock); err != nil { + return fmt.Errorf("Failed to process volume %d super block: %v", id, err) + } + + version := v.Version() + + offset := int64(SuperBlockSize) + n, rest, e := ReadNeedleHeader(v.dataFile, version, offset) + if e != nil { + err = fmt.Errorf("cannot read needle header: %v", e) + return + } + for n != nil { + if readNeedleBody { + if err = n.ReadNeedleBody(v.dataFile, version, offset+int64(NeedleHeaderSize), rest); err != nil { + glog.V(0).Infof("cannot read needle body: %v", err) + //err = fmt.Errorf("cannot read needle body: %v", err) + //return + } + if n.DataSize >= n.Size { + // this should come from a bug reported on #87 and #93 + // fixed in v0.69 + // remove this whole "if" clause later, long after 0.69 + oldRest, oldSize := rest, n.Size + padding := NeedlePaddingSize - ((n.Size + NeedleHeaderSize + NeedleChecksumSize) % NeedlePaddingSize) + n.Size = 0 + rest = n.Size + NeedleChecksumSize + padding + if rest%NeedlePaddingSize != 0 { + rest += (NeedlePaddingSize - rest%NeedlePaddingSize) + } + glog.V(4).Infof("Adjusting n.Size %d=>0 rest:%d=>%d %+v", oldSize, oldRest, rest, n) + } + } + if err = visitNeedle(n, offset); err != nil { + glog.V(0).Infof("visit needle error: %v", err) + } + offset += int64(NeedleHeaderSize) + int64(rest) + glog.V(4).Infof("==> new entry offset %d", offset) + if n, rest, err = ReadNeedleHeader(v.dataFile, version, offset); err != nil { + if err == io.EOF { + return nil + } + return fmt.Errorf("cannot read needle header: %v", err) + } + glog.V(4).Infof("new entry needle size:%d rest:%d", n.Size, rest) + } + + return +} + +func (v *Volume) ContentSize() uint64 { + return v.nm.ContentSize() +} + +func checkFile(filename string) (exists, canRead, canWrite bool, modTime time.Time) { + exists = true + fi, err := os.Stat(filename) + if os.IsNotExist(err) { + exists = false + return + } + if fi.Mode()&0400 != 0 { + canRead = true + } + if fi.Mode()&0200 != 0 { + canWrite = true + } + modTime = fi.ModTime() + return +} + +// volume is expired if modified time + volume ttl < now +// except when volume is empty +// or when the volume does not have a ttl +// or when volumeSizeLimit is 0 when server just starts +func (v *Volume) expired(volumeSizeLimit uint64) bool { + if volumeSizeLimit == 0 { + //skip if we don't know size limit + return false + } + if v.ContentSize() == 0 { + return false + } + if v.Ttl == nil || v.Ttl.Minutes() == 0 { + return false + } + glog.V(0).Infof("now:%v lastModified:%v", time.Now().Unix(), v.lastModifiedTime) + livedMinutes := (time.Now().Unix() - int64(v.lastModifiedTime)) / 60 + glog.V(0).Infof("ttl:%v lived:%v", v.Ttl, livedMinutes) + if int64(v.Ttl.Minutes()) < livedMinutes { + return true + } + return false +} + +// wait either maxDelayMinutes or 10% of ttl minutes +func (v *Volume) exiredLongEnough(maxDelayMinutes uint32) bool { + if v.Ttl == nil || v.Ttl.Minutes() == 0 { + return false + } + removalDelay := v.Ttl.Minutes() / 10 + if removalDelay > maxDelayMinutes { + removalDelay = maxDelayMinutes + } + + if uint64(v.Ttl.Minutes()+removalDelay)*60+v.lastModifiedTime < uint64(time.Now().Unix()) { + return true + } + return false +} diff --git a/weed/storage/volume_id.go b/weed/storage/volume_id.go new file mode 100644 index 000000000..0333c6cf0 --- /dev/null +++ b/weed/storage/volume_id.go @@ -0,0 +1,18 @@ +package storage + +import ( + "strconv" +) + +type VolumeId uint32 + +func NewVolumeId(vid string) (VolumeId, error) { + volumeId, err := strconv.ParseUint(vid, 10, 64) + return VolumeId(volumeId), err +} +func (vid *VolumeId) String() string { + return strconv.FormatUint(uint64(*vid), 10) +} +func (vid *VolumeId) Next() VolumeId { + return VolumeId(uint32(*vid) + 1) +} diff --git a/weed/storage/volume_info.go b/weed/storage/volume_info.go new file mode 100644 index 000000000..b3068eec3 --- /dev/null +++ b/weed/storage/volume_info.go @@ -0,0 +1,65 @@ +package storage + +import ( + "fmt" + "github.com/chrislusf/seaweedfs/weed/operation" + "sort" +) + +type VolumeInfo struct { + Id VolumeId + Size uint64 + ReplicaPlacement *ReplicaPlacement + Ttl *TTL + Collection string + Version Version + FileCount int + DeleteCount int + DeletedByteCount uint64 + ReadOnly bool +} + +func NewVolumeInfo(m *operation.VolumeInformationMessage) (vi VolumeInfo, err error) { + vi = VolumeInfo{ + Id: VolumeId(*m.Id), + Size: *m.Size, + Collection: *m.Collection, + FileCount: int(*m.FileCount), + DeleteCount: int(*m.DeleteCount), + DeletedByteCount: *m.DeletedByteCount, + ReadOnly: *m.ReadOnly, + Version: Version(*m.Version), + } + rp, e := NewReplicaPlacementFromByte(byte(*m.ReplicaPlacement)) + if e != nil { + return vi, e + } + vi.ReplicaPlacement = rp + vi.Ttl = LoadTTLFromUint32(*m.Ttl) + return vi, nil +} + +func (vi VolumeInfo) String() string { + return fmt.Sprintf("Id:%d, Size:%d, ReplicaPlacement:%s, Collection:%s, Version:%v, FileCount:%d, DeleteCount:%d, DeletedByteCount:%d, ReadOnly:%v", + vi.Id, vi.Size, vi.ReplicaPlacement, vi.Collection, vi.Version, vi.FileCount, vi.DeleteCount, vi.DeletedByteCount, vi.ReadOnly) +} + +/*VolumesInfo sorting*/ + +type volumeInfos []*VolumeInfo + +func (vis volumeInfos) Len() int { + return len(vis) +} + +func (vis volumeInfos) Less(i, j int) bool { + return vis[i].Id < vis[j].Id +} + +func (vis volumeInfos) Swap(i, j int) { + vis[i], vis[j] = vis[j], vis[i] +} + +func sortVolumeInfos(vis volumeInfos) { + sort.Sort(vis) +} diff --git a/weed/storage/volume_info_test.go b/weed/storage/volume_info_test.go new file mode 100644 index 000000000..9a9c43ad2 --- /dev/null +++ b/weed/storage/volume_info_test.go @@ -0,0 +1,23 @@ +package storage + +import "testing" + +func TestSortVolumeInfos(t *testing.T) { + vis := []*VolumeInfo{ + &VolumeInfo{ + Id: 2, + }, + &VolumeInfo{ + Id: 1, + }, + &VolumeInfo{ + Id: 3, + }, + } + sortVolumeInfos(vis) + for i := 0; i < len(vis); i++ { + if vis[i].Id != VolumeId(i+1) { + t.Fatal() + } + } +} diff --git a/weed/storage/volume_super_block.go b/weed/storage/volume_super_block.go new file mode 100644 index 000000000..fc773273d --- /dev/null +++ b/weed/storage/volume_super_block.go @@ -0,0 +1,81 @@ +package storage + +import ( + "fmt" + "os" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + SuperBlockSize = 8 +) + +/* +* Super block currently has 8 bytes allocated for each volume. +* Byte 0: version, 1 or 2 +* Byte 1: Replica Placement strategy, 000, 001, 002, 010, etc +* Byte 2 and byte 3: Time to live. See TTL for definition +* Byte 4 and byte 5: The number of times the volume has been compacted. +* Rest bytes: Reserved + */ +type SuperBlock struct { + version Version + ReplicaPlacement *ReplicaPlacement + Ttl *TTL + CompactRevision uint16 +} + +func (s *SuperBlock) Version() Version { + return s.version +} +func (s *SuperBlock) Bytes() []byte { + header := make([]byte, SuperBlockSize) + header[0] = byte(s.version) + header[1] = s.ReplicaPlacement.Byte() + s.Ttl.ToBytes(header[2:4]) + util.Uint16toBytes(header[4:6], s.CompactRevision) + return header +} + +func (v *Volume) maybeWriteSuperBlock() error { + stat, e := v.dataFile.Stat() + if e != nil { + glog.V(0).Infof("failed to stat datafile %s: %v", v.dataFile, e) + return e + } + if stat.Size() == 0 { + v.SuperBlock.version = CurrentVersion + _, e = v.dataFile.Write(v.SuperBlock.Bytes()) + if e != nil && os.IsPermission(e) { + //read-only, but zero length - recreate it! + if v.dataFile, e = os.Create(v.dataFile.Name()); e == nil { + if _, e = v.dataFile.Write(v.SuperBlock.Bytes()); e == nil { + v.readOnly = false + } + } + } + } + return e +} +func (v *Volume) readSuperBlock() (err error) { + if _, err = v.dataFile.Seek(0, 0); err != nil { + return fmt.Errorf("cannot seek to the beginning of %s: %v", v.dataFile.Name(), err) + } + header := make([]byte, SuperBlockSize) + if _, e := v.dataFile.Read(header); e != nil { + return fmt.Errorf("cannot read volume %d super block: %v", v.Id, e) + } + v.SuperBlock, err = ParseSuperBlock(header) + return err +} +func ParseSuperBlock(header []byte) (superBlock SuperBlock, err error) { + superBlock.version = Version(header[0]) + if superBlock.ReplicaPlacement, err = NewReplicaPlacementFromByte(header[1]); err != nil { + err = fmt.Errorf("cannot read replica type: %s", err.Error()) + } + superBlock.Ttl = LoadTTLFromBytes(header[2:4]) + superBlock.CompactRevision = util.BytesToUint16(header[4:6]) + return +} diff --git a/weed/storage/volume_super_block_test.go b/weed/storage/volume_super_block_test.go new file mode 100644 index 000000000..13db4b194 --- /dev/null +++ b/weed/storage/volume_super_block_test.go @@ -0,0 +1,23 @@ +package storage + +import ( + "testing" +) + +func TestSuperBlockReadWrite(t *testing.T) { + rp, _ := NewReplicaPlacementFromByte(byte(001)) + ttl, _ := ReadTTL("15d") + s := &SuperBlock{ + version: CurrentVersion, + ReplicaPlacement: rp, + Ttl: ttl, + } + + bytes := s.Bytes() + + if !(bytes[2] == 15 && bytes[3] == Day) { + println("byte[2]:", bytes[2], "byte[3]:", bytes[3]) + t.Fail() + } + +} diff --git a/weed/storage/volume_sync.go b/weed/storage/volume_sync.go new file mode 100644 index 000000000..231ff31c2 --- /dev/null +++ b/weed/storage/volume_sync.go @@ -0,0 +1,213 @@ +package storage + +import ( + "fmt" + "io" + "io/ioutil" + "net/url" + "os" + "sort" + "strconv" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/util" +) + +// The volume sync with a master volume via 2 steps: +// 1. The slave checks master side to find subscription checkpoint +// to setup the replication. +// 2. The slave receives the updates from master + +/* +Assume the slave volume needs to follow the master volume. + +The master volume could be compacted, and could be many files ahead of +slave volume. + +Step 1: +The slave volume will ask the master volume for a snapshot +of (existing file entries, last offset, number of compacted times). + +For each entry x in master existing file entries: + if x does not exist locally: + add x locally + +For each entry y in local slave existing file entries: + if y does not exist on master: + delete y locally + +Step 2: +After this, use the last offset and number of compacted times to request +the master volume to send a new file, and keep looping. If the number of +compacted times is changed, go back to step 1 (very likely this can be +optimized more later). + +*/ + +func (v *Volume) Synchronize(volumeServer string) (err error) { + var lastCompactRevision uint16 = 0 + var compactRevision uint16 = 0 + var masterMap CompactMap + for i := 0; i < 3; i++ { + if masterMap, _, compactRevision, err = fetchVolumeFileEntries(volumeServer, v.Id); err != nil { + return fmt.Errorf("Failed to sync volume %d entries with %s: %v", v.Id, volumeServer, err) + } + if lastCompactRevision != compactRevision && lastCompactRevision != 0 { + if err = v.Compact(); err != nil { + return fmt.Errorf("Compact Volume before synchronizing %v", err) + } + if err = v.commitCompact(); err != nil { + return fmt.Errorf("Commit Compact before synchronizing %v", err) + } + } + lastCompactRevision = compactRevision + if err = v.trySynchronizing(volumeServer, masterMap, compactRevision); err == nil { + return + } + } + return +} + +type ByOffset []NeedleValue + +func (a ByOffset) Len() int { return len(a) } +func (a ByOffset) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByOffset) Less(i, j int) bool { return a[i].Offset < a[j].Offset } + +// trySynchronizing sync with remote volume server incrementally by +// make up the local and remote delta. +func (v *Volume) trySynchronizing(volumeServer string, masterMap CompactMap, compactRevision uint16) error { + slaveIdxFile, err := os.Open(v.nm.IndexFileName()) + if err != nil { + return fmt.Errorf("Open volume %d index file: %v", v.Id, err) + } + defer slaveIdxFile.Close() + slaveMap, err := LoadNeedleMap(slaveIdxFile) + if err != nil { + return fmt.Errorf("Load volume %d index file: %v", v.Id, err) + } + var delta []NeedleValue + if err := masterMap.Visit(func(needleValue NeedleValue) error { + if needleValue.Key == 0 { + return nil + } + if _, ok := slaveMap.Get(uint64(needleValue.Key)); ok { + return nil // skip intersection + } + delta = append(delta, needleValue) + return nil + }); err != nil { + return fmt.Errorf("Add master entry: %v", err) + } + if err := slaveMap.m.Visit(func(needleValue NeedleValue) error { + if needleValue.Key == 0 { + return nil + } + if _, ok := masterMap.Get(needleValue.Key); ok { + return nil // skip intersection + } + needleValue.Size = 0 + delta = append(delta, needleValue) + return nil + }); err != nil { + return fmt.Errorf("Remove local entry: %v", err) + } + + // simulate to same ordering of remote .dat file needle entries + sort.Sort(ByOffset(delta)) + + // make up the delta + fetchCount := 0 + volumeDataContentHandlerUrl := "http://" + volumeServer + "/admin/sync/data" + for _, needleValue := range delta { + if needleValue.Size == 0 { + // remove file entry from local + v.removeNeedle(needleValue.Key) + continue + } + // add master file entry to local data file + if err := v.fetchNeedle(volumeDataContentHandlerUrl, needleValue, compactRevision); err != nil { + glog.V(0).Infof("Fetch needle %v from %s: %v", needleValue, volumeServer, err) + return err + } + fetchCount++ + } + glog.V(1).Infof("Fetched %d needles from %s", fetchCount, volumeServer) + return nil +} + +func fetchVolumeFileEntries(volumeServer string, vid VolumeId) (m CompactMap, lastOffset uint64, compactRevision uint16, err error) { + m = NewCompactMap() + + syncStatus, err := operation.GetVolumeSyncStatus(volumeServer, vid.String()) + if err != nil { + return m, 0, 0, err + } + + total := 0 + err = operation.GetVolumeIdxEntries(volumeServer, vid.String(), func(key uint64, offset, size uint32) { + // println("remote key", key, "offset", offset*NeedlePaddingSize, "size", size) + if offset != 0 && size != 0 { + m.Set(Key(key), offset, size) + } else { + m.Delete(Key(key)) + } + total++ + }) + + glog.V(2).Infof("server %s volume %d, entries %d, last offset %d, revision %d", volumeServer, vid, total, syncStatus.TailOffset, syncStatus.CompactRevision) + return m, syncStatus.TailOffset, syncStatus.CompactRevision, err + +} + +func (v *Volume) GetVolumeSyncStatus() operation.SyncVolumeResponse { + var syncStatus = operation.SyncVolumeResponse{} + if stat, err := v.dataFile.Stat(); err == nil { + syncStatus.TailOffset = uint64(stat.Size()) + } + syncStatus.IdxFileSize = v.nm.IndexFileSize() + syncStatus.CompactRevision = v.SuperBlock.CompactRevision + syncStatus.Ttl = v.SuperBlock.Ttl.String() + syncStatus.Replication = v.SuperBlock.ReplicaPlacement.String() + return syncStatus +} + +func (v *Volume) IndexFileContent() ([]byte, error) { + return v.nm.IndexFileContent() +} + +// removeNeedle removes one needle by needle key +func (v *Volume) removeNeedle(key Key) { + n := new(Needle) + n.Id = uint64(key) + v.delete(n) +} + +// fetchNeedle fetches a remote volume needle by vid, id, offset +// The compact revision is checked first in case the remote volume +// is compacted and the offset is invalid any more. +func (v *Volume) fetchNeedle(volumeDataContentHandlerUrl string, + needleValue NeedleValue, compactRevision uint16) error { + // add master file entry to local data file + values := make(url.Values) + values.Add("revision", strconv.Itoa(int(compactRevision))) + values.Add("volume", v.Id.String()) + values.Add("id", needleValue.Key.String()) + values.Add("offset", strconv.FormatUint(uint64(needleValue.Offset), 10)) + values.Add("size", strconv.FormatUint(uint64(needleValue.Size), 10)) + glog.V(4).Infof("Fetch %+v", needleValue) + return util.GetUrlStream(volumeDataContentHandlerUrl, values, func(r io.Reader) error { + b, err := ioutil.ReadAll(r) + if err != nil { + return fmt.Errorf("Reading from %s error: %v", volumeDataContentHandlerUrl, err) + } + offset, err := v.AppendBlob(b) + if err != nil { + return fmt.Errorf("Appending volume %d error: %v", v.Id, err) + } + // println("add key", needleValue.Key, "offset", offset, "size", needleValue.Size) + v.nm.Put(uint64(needleValue.Key), uint32(offset/NeedlePaddingSize), needleValue.Size) + return nil + }) +} diff --git a/weed/storage/volume_ttl.go b/weed/storage/volume_ttl.go new file mode 100644 index 000000000..4318bb048 --- /dev/null +++ b/weed/storage/volume_ttl.go @@ -0,0 +1,135 @@ +package storage + +import ( + "strconv" +) + +const ( + //stored unit types + Empty byte = iota + Minute + Hour + Day + Week + Month + Year +) + +type TTL struct { + count byte + unit byte +} + +var EMPTY_TTL = &TTL{} + +// translate a readable ttl to internal ttl +// Supports format example: +// 3m: 3 minutes +// 4h: 4 hours +// 5d: 5 days +// 6w: 6 weeks +// 7M: 7 months +// 8y: 8 years +func ReadTTL(ttlString string) (*TTL, error) { + if ttlString == "" { + return EMPTY_TTL, nil + } + ttlBytes := []byte(ttlString) + unitByte := ttlBytes[len(ttlBytes)-1] + countBytes := ttlBytes[0 : len(ttlBytes)-1] + if '0' <= unitByte && unitByte <= '9' { + countBytes = ttlBytes + unitByte = 'm' + } + count, err := strconv.Atoi(string(countBytes)) + unit := toStoredByte(unitByte) + return &TTL{count: byte(count), unit: unit}, err +} + +// read stored bytes to a ttl +func LoadTTLFromBytes(input []byte) (t *TTL) { + return &TTL{count: input[0], unit: input[1]} +} + +// read stored bytes to a ttl +func LoadTTLFromUint32(ttl uint32) (t *TTL) { + input := make([]byte, 2) + input[1] = byte(ttl) + input[0] = byte(ttl >> 8) + return LoadTTLFromBytes(input) +} + +// save stored bytes to an output with 2 bytes +func (t *TTL) ToBytes(output []byte) { + output[0] = t.count + output[1] = t.unit +} + +func (t *TTL) ToUint32() (output uint32) { + output = uint32(t.count) << 8 + output += uint32(t.unit) + return output +} + +func (t *TTL) String() string { + if t == nil || t.count == 0 { + return "" + } + if t.unit == Empty { + return "" + } + countString := strconv.Itoa(int(t.count)) + switch t.unit { + case Minute: + return countString + "m" + case Hour: + return countString + "h" + case Day: + return countString + "d" + case Week: + return countString + "w" + case Month: + return countString + "M" + case Year: + return countString + "y" + } + return "" +} + +func toStoredByte(readableUnitByte byte) byte { + switch readableUnitByte { + case 'm': + return Minute + case 'h': + return Hour + case 'd': + return Day + case 'w': + return Week + case 'M': + return Month + case 'y': + return Year + } + return 0 +} + +func (t TTL) Minutes() uint32 { + switch t.unit { + case Empty: + return 0 + case Minute: + return uint32(t.count) + case Hour: + return uint32(t.count) * 60 + case Day: + return uint32(t.count) * 60 * 24 + case Week: + return uint32(t.count) * 60 * 24 * 7 + case Month: + return uint32(t.count) * 60 * 24 * 31 + case Year: + return uint32(t.count) * 60 * 24 * 365 + } + return 0 +} diff --git a/weed/storage/volume_ttl_test.go b/weed/storage/volume_ttl_test.go new file mode 100644 index 000000000..216469a4c --- /dev/null +++ b/weed/storage/volume_ttl_test.go @@ -0,0 +1,60 @@ +package storage + +import ( + "testing" +) + +func TestTTLReadWrite(t *testing.T) { + ttl, _ := ReadTTL("") + if ttl.Minutes() != 0 { + t.Errorf("empty ttl:%v", ttl) + } + + ttl, _ = ReadTTL("9") + if ttl.Minutes() != 9 { + t.Errorf("9 ttl:%v", ttl) + } + + ttl, _ = ReadTTL("8m") + if ttl.Minutes() != 8 { + t.Errorf("8m ttl:%v", ttl) + } + + ttl, _ = ReadTTL("5h") + if ttl.Minutes() != 300 { + t.Errorf("5h ttl:%v", ttl) + } + + ttl, _ = ReadTTL("5d") + if ttl.Minutes() != 5*24*60 { + t.Errorf("5d ttl:%v", ttl) + } + + ttl, _ = ReadTTL("5w") + if ttl.Minutes() != 5*7*24*60 { + t.Errorf("5w ttl:%v", ttl) + } + + ttl, _ = ReadTTL("5M") + if ttl.Minutes() != 5*31*24*60 { + t.Errorf("5M ttl:%v", ttl) + } + + ttl, _ = ReadTTL("5y") + if ttl.Minutes() != 5*365*24*60 { + t.Errorf("5y ttl:%v", ttl) + } + + output := make([]byte, 2) + ttl.ToBytes(output) + ttl2 := LoadTTLFromBytes(output) + if ttl.Minutes() != ttl2.Minutes() { + t.Errorf("ttl:%v ttl2:%v", ttl, ttl2) + } + + ttl3 := LoadTTLFromUint32(ttl.ToUint32()) + if ttl.Minutes() != ttl3.Minutes() { + t.Errorf("ttl:%v ttl3:%v", ttl, ttl3) + } + +} diff --git a/weed/storage/volume_vacuum.go b/weed/storage/volume_vacuum.go new file mode 100644 index 000000000..9b9a27816 --- /dev/null +++ b/weed/storage/volume_vacuum.go @@ -0,0 +1,93 @@ +package storage + +import ( + "fmt" + "os" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func (v *Volume) garbageLevel() float64 { + return float64(v.nm.DeletedSize()) / float64(v.ContentSize()) +} + +func (v *Volume) Compact() error { + glog.V(3).Infof("Compacting ...") + //no need to lock for copy on write + //v.accessLock.Lock() + //defer v.accessLock.Unlock() + //glog.V(3).Infof("Got Compaction lock...") + + filePath := v.FileName() + glog.V(3).Infof("creating copies for volume %d ...", v.Id) + return v.copyDataAndGenerateIndexFile(filePath+".cpd", filePath+".cpx") +} +func (v *Volume) commitCompact() error { + glog.V(3).Infof("Committing vacuuming...") + v.dataFileAccessLock.Lock() + defer v.dataFileAccessLock.Unlock() + glog.V(3).Infof("Got Committing lock...") + v.nm.Close() + _ = v.dataFile.Close() + var e error + if e = os.Rename(v.FileName()+".cpd", v.FileName()+".dat"); e != nil { + return e + } + if e = os.Rename(v.FileName()+".cpx", v.FileName()+".idx"); e != nil { + return e + } + //glog.V(3).Infof("Pretending to be vacuuming...") + //time.Sleep(20 * time.Second) + glog.V(3).Infof("Loading Commit file...") + if e = v.load(true, false, v.needleMapKind); e != nil { + return e + } + return nil +} + +func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string) (err error) { + var ( + dst, idx *os.File + ) + if dst, err = os.OpenFile(dstName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil { + return + } + defer dst.Close() + + if idx, err = os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil { + return + } + defer idx.Close() + + nm := NewNeedleMap(idx) + new_offset := int64(SuperBlockSize) + + now := uint64(time.Now().Unix()) + + err = ScanVolumeFile(v.dir, v.Collection, v.Id, v.needleMapKind, + func(superBlock SuperBlock) error { + superBlock.CompactRevision++ + _, err = dst.Write(superBlock.Bytes()) + return err + }, true, func(n *Needle, offset int64) error { + if n.HasTtl() && now >= n.LastModified+uint64(v.Ttl.Minutes()*60) { + return nil + } + nv, ok := v.nm.Get(n.Id) + glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv) + if ok && int64(nv.Offset)*NeedlePaddingSize == offset && nv.Size > 0 { + if err = nm.Put(n.Id, uint32(new_offset/NeedlePaddingSize), n.Size); err != nil { + return fmt.Errorf("cannot put needle: %s", err) + } + if _, err = n.Append(dst, v.Version()); err != nil { + return fmt.Errorf("cannot append needle: %s", err) + } + new_offset += n.DiskSize() + glog.V(3).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size) + } + return nil + }) + + return +} diff --git a/weed/storage/volume_version.go b/weed/storage/volume_version.go new file mode 100644 index 000000000..2e9f58aa2 --- /dev/null +++ b/weed/storage/volume_version.go @@ -0,0 +1,9 @@ +package storage + +type Version uint8 + +const ( + Version1 = Version(1) + Version2 = Version(2) + CurrentVersion = Version2 +) diff --git a/weed/tools/read_index.go b/weed/tools/read_index.go new file mode 100644 index 000000000..642ff786b --- /dev/null +++ b/weed/tools/read_index.go @@ -0,0 +1,28 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os" + + "github.com/chrislusf/seaweedfs/weed/storage" +) + +var ( + indexFileName = flag.String("file", "", ".idx file to analyze") +) + +func main() { + flag.Parse() + indexFile, err := os.OpenFile(*indexFileName, os.O_RDONLY, 0644) + if err != nil { + log.Fatalf("Create Volume Index [ERROR] %s\n", err) + } + defer indexFile.Close() + + storage.WalkIndexFile(indexFile, func(key uint64, offset, size uint32) error { + fmt.Printf("key %d, offset %d, size %d, nextOffset %d\n", key, offset*8, size, offset*8+size) + return nil + }) +} diff --git a/weed/topology/allocate_volume.go b/weed/topology/allocate_volume.go new file mode 100644 index 000000000..7b267a805 --- /dev/null +++ b/weed/topology/allocate_volume.go @@ -0,0 +1,35 @@ +package topology + +import ( + "encoding/json" + "errors" + "fmt" + "net/url" + + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type AllocateVolumeResult struct { + Error string +} + +func AllocateVolume(dn *DataNode, vid storage.VolumeId, option *VolumeGrowOption) error { + values := make(url.Values) + values.Add("volume", vid.String()) + values.Add("collection", option.Collection) + values.Add("replication", option.ReplicaPlacement.String()) + values.Add("ttl", option.Ttl.String()) + jsonBlob, err := util.Post("http://"+dn.Url()+"/admin/assign_volume", values) + if err != nil { + return err + } + var ret AllocateVolumeResult + if err := json.Unmarshal(jsonBlob, &ret); err != nil { + return fmt.Errorf("Invalid JSON result for %s: %s", "/admin/assign_volum", string(jsonBlob)) + } + if ret.Error != "" { + return errors.New(ret.Error) + } + return nil +} diff --git a/weed/topology/cluster_commands.go b/weed/topology/cluster_commands.go new file mode 100644 index 000000000..53f45ec4d --- /dev/null +++ b/weed/topology/cluster_commands.go @@ -0,0 +1,31 @@ +package topology + +import ( + "github.com/chrislusf/raft" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +type MaxVolumeIdCommand struct { + MaxVolumeId storage.VolumeId `json:"maxVolumeId"` +} + +func NewMaxVolumeIdCommand(value storage.VolumeId) *MaxVolumeIdCommand { + return &MaxVolumeIdCommand{ + MaxVolumeId: value, + } +} + +func (c *MaxVolumeIdCommand) CommandName() string { + return "MaxVolumeId" +} + +func (c *MaxVolumeIdCommand) Apply(server raft.Server) (interface{}, error) { + topo := server.Context().(*Topology) + before := topo.GetMaxVolumeId() + topo.UpAdjustMaxVolumeId(c.MaxVolumeId) + + glog.V(4).Infoln("max volume id", before, "==>", topo.GetMaxVolumeId()) + + return nil, nil +} diff --git a/weed/topology/collection.go b/weed/topology/collection.go new file mode 100644 index 000000000..a17f0c961 --- /dev/null +++ b/weed/topology/collection.go @@ -0,0 +1,57 @@ +package topology + +import ( + "fmt" + + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type Collection struct { + Name string + volumeSizeLimit uint64 + storageType2VolumeLayout *util.ConcurrentReadMap +} + +func NewCollection(name string, volumeSizeLimit uint64) *Collection { + c := &Collection{Name: name, volumeSizeLimit: volumeSizeLimit} + c.storageType2VolumeLayout = util.NewConcurrentReadMap() + return c +} + +func (c *Collection) String() string { + return fmt.Sprintf("Name:%s, volumeSizeLimit:%d, storageType2VolumeLayout:%v", c.Name, c.volumeSizeLimit, c.storageType2VolumeLayout) +} + +func (c *Collection) GetOrCreateVolumeLayout(rp *storage.ReplicaPlacement, ttl *storage.TTL) *VolumeLayout { + keyString := rp.String() + if ttl != nil { + keyString += ttl.String() + } + vl := c.storageType2VolumeLayout.Get(keyString, func() interface{} { + return NewVolumeLayout(rp, ttl, c.volumeSizeLimit) + }) + return vl.(*VolumeLayout) +} + +func (c *Collection) Lookup(vid storage.VolumeId) []*DataNode { + for _, vl := range c.storageType2VolumeLayout.Items() { + if vl != nil { + if list := vl.(*VolumeLayout).Lookup(vid); list != nil { + return list + } + } + } + return nil +} + +func (c *Collection) ListVolumeServers() (nodes []*DataNode) { + for _, vl := range c.storageType2VolumeLayout.Items() { + if vl != nil { + if list := vl.(*VolumeLayout).ListVolumeServers(); list != nil { + nodes = append(nodes, list...) + } + } + } + return +} diff --git a/weed/topology/configuration.go b/weed/topology/configuration.go new file mode 100644 index 000000000..ffcebb59c --- /dev/null +++ b/weed/topology/configuration.go @@ -0,0 +1,65 @@ +package topology + +import ( + "encoding/xml" +) + +type loc struct { + dcName string + rackName string +} +type rack struct { + Name string `xml:"name,attr"` + Ips []string `xml:"Ip"` +} +type dataCenter struct { + Name string `xml:"name,attr"` + Racks []rack `xml:"Rack"` +} +type topology struct { + DataCenters []dataCenter `xml:"DataCenter"` +} +type Configuration struct { + XMLName xml.Name `xml:"Configuration"` + Topo topology `xml:"Topology"` + ip2location map[string]loc +} + +func NewConfiguration(b []byte) (*Configuration, error) { + c := &Configuration{} + err := xml.Unmarshal(b, c) + c.ip2location = make(map[string]loc) + for _, dc := range c.Topo.DataCenters { + for _, rack := range dc.Racks { + for _, ip := range rack.Ips { + c.ip2location[ip] = loc{dcName: dc.Name, rackName: rack.Name} + } + } + } + return c, err +} + +func (c *Configuration) String() string { + if b, e := xml.MarshalIndent(c, " ", " "); e == nil { + return string(b) + } + return "" +} + +func (c *Configuration) Locate(ip string, dcName string, rackName string) (dc string, rack string) { + if c != nil && c.ip2location != nil { + if loc, ok := c.ip2location[ip]; ok { + return loc.dcName, loc.rackName + } + } + + if dcName == "" { + dcName = "DefaultDataCenter" + } + + if rackName == "" { + rackName = "DefaultRack" + } + + return dcName, rackName +} diff --git a/weed/topology/configuration_test.go b/weed/topology/configuration_test.go new file mode 100644 index 000000000..0a353d16e --- /dev/null +++ b/weed/topology/configuration_test.go @@ -0,0 +1,42 @@ +package topology + +import ( + "fmt" + "testing" +) + +func TestLoadConfiguration(t *testing.T) { + + confContent := ` + +<?xml version="1.0" encoding="UTF-8" ?> +<Configuration> + <Topology> + <DataCenter name="dc1"> + <Rack name="rack1"> + <Ip>192.168.1.1</Ip> + </Rack> + </DataCenter> + <DataCenter name="dc2"> + <Rack name="rack1"> + <Ip>192.168.1.2</Ip> + </Rack> + <Rack name="rack2"> + <Ip>192.168.1.3</Ip> + <Ip>192.168.1.4</Ip> + </Rack> + </DataCenter> + </Topology> +</Configuration> +` + c, err := NewConfiguration([]byte(confContent)) + + fmt.Printf("%s\n", c) + if err != nil { + t.Fatalf("unmarshal error:%v", err) + } + + if len(c.Topo.DataCenters) <= 0 || c.Topo.DataCenters[0].Name != "dc1" { + t.Fatalf("unmarshal error:%s", c) + } +} diff --git a/weed/topology/data_center.go b/weed/topology/data_center.go new file mode 100644 index 000000000..bcf2dfd31 --- /dev/null +++ b/weed/topology/data_center.go @@ -0,0 +1,40 @@ +package topology + +type DataCenter struct { + NodeImpl +} + +func NewDataCenter(id string) *DataCenter { + dc := &DataCenter{} + dc.id = NodeId(id) + dc.nodeType = "DataCenter" + dc.children = make(map[NodeId]Node) + dc.NodeImpl.value = dc + return dc +} + +func (dc *DataCenter) GetOrCreateRack(rackName string) *Rack { + for _, c := range dc.Children() { + rack := c.(*Rack) + if string(rack.Id()) == rackName { + return rack + } + } + rack := NewRack(rackName) + dc.LinkChildNode(rack) + return rack +} + +func (dc *DataCenter) ToMap() interface{} { + m := make(map[string]interface{}) + m["Id"] = dc.Id() + m["Max"] = dc.GetMaxVolumeCount() + m["Free"] = dc.FreeSpace() + var racks []interface{} + for _, c := range dc.Children() { + rack := c.(*Rack) + racks = append(racks, rack.ToMap()) + } + m["Racks"] = racks + return m +} diff --git a/weed/topology/data_node.go b/weed/topology/data_node.go new file mode 100644 index 000000000..1404d4aa8 --- /dev/null +++ b/weed/topology/data_node.go @@ -0,0 +1,115 @@ +package topology + +import ( + "fmt" + "strconv" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +type DataNode struct { + NodeImpl + volumes map[storage.VolumeId]storage.VolumeInfo + Ip string + Port int + PublicUrl string + LastSeen int64 // unix time in seconds + Dead bool +} + +func NewDataNode(id string) *DataNode { + s := &DataNode{} + s.id = NodeId(id) + s.nodeType = "DataNode" + s.volumes = make(map[storage.VolumeId]storage.VolumeInfo) + s.NodeImpl.value = s + return s +} + +func (dn *DataNode) String() string { + dn.RLock() + defer dn.RUnlock() + return fmt.Sprintf("Node:%s, volumes:%v, Ip:%s, Port:%d, PublicUrl:%s, Dead:%v", dn.NodeImpl.String(), dn.volumes, dn.Ip, dn.Port, dn.PublicUrl, dn.Dead) +} + +func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) { + dn.Lock() + defer dn.Unlock() + if _, ok := dn.volumes[v.Id]; !ok { + dn.volumes[v.Id] = v + dn.UpAdjustVolumeCountDelta(1) + if !v.ReadOnly { + dn.UpAdjustActiveVolumeCountDelta(1) + } + dn.UpAdjustMaxVolumeId(v.Id) + } else { + dn.volumes[v.Id] = v + } +} + +func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (deletedVolumes []storage.VolumeInfo) { + actualVolumeMap := make(map[storage.VolumeId]storage.VolumeInfo) + for _, v := range actualVolumes { + actualVolumeMap[v.Id] = v + } + dn.RLock() + for vid, v := range dn.volumes { + if _, ok := actualVolumeMap[vid]; !ok { + glog.V(0).Infoln("Deleting volume id:", vid) + delete(dn.volumes, vid) + deletedVolumes = append(deletedVolumes, v) + dn.UpAdjustVolumeCountDelta(-1) + dn.UpAdjustActiveVolumeCountDelta(-1) + } + } //TODO: adjust max volume id, if need to reclaim volume ids + dn.RUnlock() + for _, v := range actualVolumes { + dn.AddOrUpdateVolume(v) + } + return +} + +func (dn *DataNode) GetVolumes() (ret []storage.VolumeInfo) { + dn.RLock() + for _, v := range dn.volumes { + ret = append(ret, v) + } + dn.RUnlock() + return ret +} + +func (dn *DataNode) GetDataCenter() *DataCenter { + return dn.Parent().Parent().(*NodeImpl).value.(*DataCenter) +} + +func (dn *DataNode) GetRack() *Rack { + return dn.Parent().(*NodeImpl).value.(*Rack) +} + +func (dn *DataNode) GetTopology() *Topology { + p := dn.Parent() + for p.Parent() != nil { + p = p.Parent() + } + t := p.(*Topology) + return t +} + +func (dn *DataNode) MatchLocation(ip string, port int) bool { + return dn.Ip == ip && dn.Port == port +} + +func (dn *DataNode) Url() string { + return dn.Ip + ":" + strconv.Itoa(dn.Port) +} + +func (dn *DataNode) ToMap() interface{} { + ret := make(map[string]interface{}) + ret["Url"] = dn.Url() + ret["Volumes"] = dn.GetVolumeCount() + ret["Max"] = dn.GetMaxVolumeCount() + ret["Free"] = dn.FreeSpace() + ret["PublicUrl"] = dn.PublicUrl + return ret +} diff --git a/weed/topology/node.go b/weed/topology/node.go new file mode 100644 index 000000000..4ce35f4b0 --- /dev/null +++ b/weed/topology/node.go @@ -0,0 +1,272 @@ +package topology + +import ( + "errors" + "math/rand" + "strings" + "sync" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +type NodeId string +type Node interface { + Id() NodeId + String() string + FreeSpace() int + ReserveOneVolume(r int) (*DataNode, error) + UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int) + UpAdjustVolumeCountDelta(volumeCountDelta int) + UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int) + UpAdjustMaxVolumeId(vid storage.VolumeId) + + GetVolumeCount() int + GetActiveVolumeCount() int + GetMaxVolumeCount() int + GetMaxVolumeId() storage.VolumeId + SetParent(Node) + LinkChildNode(node Node) + UnlinkChildNode(nodeId NodeId) + CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64) + + IsDataNode() bool + IsRack() bool + IsDataCenter() bool + Children() []Node + Parent() Node + + GetValue() interface{} //get reference to the topology,dc,rack,datanode +} +type NodeImpl struct { + id NodeId + volumeCount int + activeVolumeCount int + maxVolumeCount int + parent Node + sync.RWMutex // lock children + children map[NodeId]Node + maxVolumeId storage.VolumeId + + //for rack, data center, topology + nodeType string + value interface{} +} + +// the first node must satisfy filterFirstNodeFn(), the rest nodes must have one free slot +func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn func(dn Node) error) (firstNode Node, restNodes []Node, err error) { + candidates := make([]Node, 0, len(n.children)) + var errs []string + n.RLock() + for _, node := range n.children { + if err := filterFirstNodeFn(node); err == nil { + candidates = append(candidates, node) + } else { + errs = append(errs, string(node.Id())+":"+err.Error()) + } + } + n.RUnlock() + if len(candidates) == 0 { + return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n")) + } + firstNode = candidates[rand.Intn(len(candidates))] + glog.V(2).Infoln(n.Id(), "picked main node:", firstNode.Id()) + + restNodes = make([]Node, numberOfNodes-1) + candidates = candidates[:0] + n.RLock() + for _, node := range n.children { + if node.Id() == firstNode.Id() { + continue + } + if node.FreeSpace() <= 0 { + continue + } + glog.V(2).Infoln("select rest node candidate:", node.Id()) + candidates = append(candidates, node) + } + n.RUnlock() + glog.V(2).Infoln(n.Id(), "picking", numberOfNodes-1, "from rest", len(candidates), "node candidates") + ret := len(restNodes) == 0 + for k, node := range candidates { + if k < len(restNodes) { + restNodes[k] = node + if k == len(restNodes)-1 { + ret = true + } + } else { + r := rand.Intn(k + 1) + if r < len(restNodes) { + restNodes[r] = node + } + } + } + if !ret { + glog.V(2).Infoln(n.Id(), "failed to pick", numberOfNodes-1, "from rest", len(candidates), "node candidates") + err = errors.New("Not enough data node found!") + } + return +} + +func (n *NodeImpl) IsDataNode() bool { + return n.nodeType == "DataNode" +} +func (n *NodeImpl) IsRack() bool { + return n.nodeType == "Rack" +} +func (n *NodeImpl) IsDataCenter() bool { + return n.nodeType == "DataCenter" +} +func (n *NodeImpl) String() string { + if n.parent != nil { + return n.parent.String() + ":" + string(n.id) + } + return string(n.id) +} +func (n *NodeImpl) Id() NodeId { + return n.id +} +func (n *NodeImpl) FreeSpace() int { + return n.maxVolumeCount - n.volumeCount +} +func (n *NodeImpl) SetParent(node Node) { + n.parent = node +} +func (n *NodeImpl) Children() (ret []Node) { + n.RLock() + defer n.RUnlock() + for _, c := range n.children { + ret = append(ret, c) + } + return ret +} +func (n *NodeImpl) Parent() Node { + return n.parent +} +func (n *NodeImpl) GetValue() interface{} { + return n.value +} +func (n *NodeImpl) ReserveOneVolume(r int) (assignedNode *DataNode, err error) { + n.RLock() + defer n.RUnlock() + for _, node := range n.children { + freeSpace := node.FreeSpace() + // fmt.Println("r =", r, ", node =", node, ", freeSpace =", freeSpace) + if freeSpace <= 0 { + continue + } + if r >= freeSpace { + r -= freeSpace + } else { + if node.IsDataNode() && node.FreeSpace() > 0 { + // fmt.Println("vid =", vid, " assigned to node =", node, ", freeSpace =", node.FreeSpace()) + return node.(*DataNode), nil + } + assignedNode, err = node.ReserveOneVolume(r) + if err != nil { + return + } + } + } + return +} + +func (n *NodeImpl) UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int) { //can be negative + n.maxVolumeCount += maxVolumeCountDelta + if n.parent != nil { + n.parent.UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta) + } +} +func (n *NodeImpl) UpAdjustVolumeCountDelta(volumeCountDelta int) { //can be negative + n.volumeCount += volumeCountDelta + if n.parent != nil { + n.parent.UpAdjustVolumeCountDelta(volumeCountDelta) + } +} +func (n *NodeImpl) UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int) { //can be negative + n.activeVolumeCount += activeVolumeCountDelta + if n.parent != nil { + n.parent.UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta) + } +} +func (n *NodeImpl) UpAdjustMaxVolumeId(vid storage.VolumeId) { //can be negative + if n.maxVolumeId < vid { + n.maxVolumeId = vid + if n.parent != nil { + n.parent.UpAdjustMaxVolumeId(vid) + } + } +} +func (n *NodeImpl) GetMaxVolumeId() storage.VolumeId { + return n.maxVolumeId +} +func (n *NodeImpl) GetVolumeCount() int { + return n.volumeCount +} +func (n *NodeImpl) GetActiveVolumeCount() int { + return n.activeVolumeCount +} +func (n *NodeImpl) GetMaxVolumeCount() int { + return n.maxVolumeCount +} + +func (n *NodeImpl) LinkChildNode(node Node) { + n.Lock() + defer n.Unlock() + if n.children[node.Id()] == nil { + n.children[node.Id()] = node + n.UpAdjustMaxVolumeCountDelta(node.GetMaxVolumeCount()) + n.UpAdjustMaxVolumeId(node.GetMaxVolumeId()) + n.UpAdjustVolumeCountDelta(node.GetVolumeCount()) + n.UpAdjustActiveVolumeCountDelta(node.GetActiveVolumeCount()) + node.SetParent(n) + glog.V(0).Infoln(n, "adds child", node.Id()) + } +} + +func (n *NodeImpl) UnlinkChildNode(nodeId NodeId) { + n.Lock() + defer n.Unlock() + node := n.children[nodeId] + if node != nil { + node.SetParent(nil) + delete(n.children, node.Id()) + n.UpAdjustVolumeCountDelta(-node.GetVolumeCount()) + n.UpAdjustActiveVolumeCountDelta(-node.GetActiveVolumeCount()) + n.UpAdjustMaxVolumeCountDelta(-node.GetMaxVolumeCount()) + glog.V(0).Infoln(n, "removes", node, "volumeCount =", n.activeVolumeCount) + } +} + +func (n *NodeImpl) CollectDeadNodeAndFullVolumes(freshThreshHold int64, volumeSizeLimit uint64) { + if n.IsRack() { + for _, c := range n.Children() { + dn := c.(*DataNode) //can not cast n to DataNode + if dn.LastSeen < freshThreshHold { + if !dn.Dead { + dn.Dead = true + n.GetTopology().chanDeadDataNodes <- dn + } + } + for _, v := range dn.GetVolumes() { + if uint64(v.Size) >= volumeSizeLimit { + //fmt.Println("volume",v.Id,"size",v.Size,">",volumeSizeLimit) + n.GetTopology().chanFullVolumes <- v + } + } + } + } else { + for _, c := range n.Children() { + c.CollectDeadNodeAndFullVolumes(freshThreshHold, volumeSizeLimit) + } + } +} + +func (n *NodeImpl) GetTopology() *Topology { + var p Node + p = n + for p.Parent() != nil { + p = p.Parent() + } + return p.GetValue().(*Topology) +} diff --git a/weed/topology/rack.go b/weed/topology/rack.go new file mode 100644 index 000000000..1ca2f8de8 --- /dev/null +++ b/weed/topology/rack.go @@ -0,0 +1,65 @@ +package topology + +import ( + "strconv" + "time" +) + +type Rack struct { + NodeImpl +} + +func NewRack(id string) *Rack { + r := &Rack{} + r.id = NodeId(id) + r.nodeType = "Rack" + r.children = make(map[NodeId]Node) + r.NodeImpl.value = r + return r +} + +func (r *Rack) FindDataNode(ip string, port int) *DataNode { + for _, c := range r.Children() { + dn := c.(*DataNode) + if dn.MatchLocation(ip, port) { + return dn + } + } + return nil +} +func (r *Rack) GetOrCreateDataNode(ip string, port int, publicUrl string, maxVolumeCount int) *DataNode { + for _, c := range r.Children() { + dn := c.(*DataNode) + if dn.MatchLocation(ip, port) { + dn.LastSeen = time.Now().Unix() + if dn.Dead { + dn.Dead = false + r.GetTopology().chanRecoveredDataNodes <- dn + dn.UpAdjustMaxVolumeCountDelta(maxVolumeCount - dn.maxVolumeCount) + } + return dn + } + } + dn := NewDataNode(ip + ":" + strconv.Itoa(port)) + dn.Ip = ip + dn.Port = port + dn.PublicUrl = publicUrl + dn.maxVolumeCount = maxVolumeCount + dn.LastSeen = time.Now().Unix() + r.LinkChildNode(dn) + return dn +} + +func (r *Rack) ToMap() interface{} { + m := make(map[string]interface{}) + m["Id"] = r.Id() + m["Max"] = r.GetMaxVolumeCount() + m["Free"] = r.FreeSpace() + var dns []interface{} + for _, c := range r.Children() { + dn := c.(*DataNode) + dns = append(dns, dn.ToMap()) + } + m["DataNodes"] = dns + return m +} diff --git a/weed/topology/store_replicate.go b/weed/topology/store_replicate.go new file mode 100644 index 000000000..be5777167 --- /dev/null +++ b/weed/topology/store_replicate.go @@ -0,0 +1,150 @@ +package topology + +import ( + "bytes" + "errors" + "fmt" + "net/http" + "strconv" + "strings" + + "net/url" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func ReplicatedWrite(masterNode string, s *storage.Store, + volumeId storage.VolumeId, needle *storage.Needle, + r *http.Request) (size uint32, errorStatus string) { + + //check JWT + jwt := security.GetJwt(r) + + ret, err := s.Write(volumeId, needle) + needToReplicate := !s.HasVolume(volumeId) + if err != nil { + errorStatus = "Failed to write to local disk (" + err.Error() + ")" + } else if ret > 0 { + needToReplicate = needToReplicate || s.GetVolume(volumeId).NeedToReplicate() + } else { + errorStatus = "Failed to write to local disk" + } + if !needToReplicate && ret > 0 { + needToReplicate = s.GetVolume(volumeId).NeedToReplicate() + } + if needToReplicate { //send to other replica locations + if r.FormValue("type") != "replicate" { + + if err = distributedOperation(masterNode, s, volumeId, func(location operation.Location) error { + u := url.URL{ + Scheme: "http", + Host: location.Url, + Path: r.URL.Path, + } + q := url.Values{ + "type": {"replicate"}, + } + if needle.LastModified > 0 { + q.Set("ts", strconv.FormatUint(needle.LastModified, 10)) + } + if needle.IsChunkedManifest() { + q.Set("cm", "true") + } + u.RawQuery = q.Encode() + _, err := operation.Upload(u.String(), + string(needle.Name), bytes.NewReader(needle.Data), needle.IsGzipped(), string(needle.Mime), + jwt) + return err + }); err != nil { + ret = 0 + errorStatus = fmt.Sprintf("Failed to write to replicas for volume %d: %v", volumeId, err) + } + } + } + size = ret + return +} + +func ReplicatedDelete(masterNode string, store *storage.Store, + volumeId storage.VolumeId, n *storage.Needle, + r *http.Request) (uint32, error) { + + //check JWT + jwt := security.GetJwt(r) + + ret, err := store.Delete(volumeId, n) + if err != nil { + glog.V(0).Infoln("delete error:", err) + return ret, err + } + + needToReplicate := !store.HasVolume(volumeId) + if !needToReplicate && ret > 0 { + needToReplicate = store.GetVolume(volumeId).NeedToReplicate() + } + if needToReplicate { //send to other replica locations + if r.FormValue("type") != "replicate" { + if err = distributedOperation(masterNode, store, volumeId, func(location operation.Location) error { + return util.Delete("http://"+location.Url+r.URL.Path+"?type=replicate", jwt) + }); err != nil { + ret = 0 + } + } + } + return ret, err +} + +type DistributedOperationResult map[string]error + +func (dr DistributedOperationResult) Error() error { + var errs []string + for k, v := range dr { + if v != nil { + errs = append(errs, fmt.Sprintf("[%s]: %v", k, v)) + } + } + if len(errs) == 0 { + return nil + } + return errors.New(strings.Join(errs, "\n")) +} + +type RemoteResult struct { + Host string + Error error +} + +func distributedOperation(masterNode string, store *storage.Store, volumeId storage.VolumeId, op func(location operation.Location) error) error { + if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil { + length := 0 + selfUrl := (store.Ip + ":" + strconv.Itoa(store.Port)) + results := make(chan RemoteResult) + for _, location := range lookupResult.Locations { + if location.Url != selfUrl { + length++ + go func(location operation.Location, results chan RemoteResult) { + results <- RemoteResult{location.Url, op(location)} + }(location, results) + } + } + ret := DistributedOperationResult(make(map[string]error)) + for i := 0; i < length; i++ { + result := <-results + ret[result.Host] = result.Error + } + if volume := store.GetVolume(volumeId); volume != nil { + if length+1 < volume.ReplicaPlacement.GetCopyCount() { + return fmt.Errorf("replicating opetations [%d] is less than volume's replication copy count [%d]", length+1, volume.ReplicaPlacement.GetCopyCount()) + } + } + return ret.Error() + } else { + glog.V(0).Infoln() + return fmt.Errorf("Failed to lookup for %d: %v", volumeId, lookupErr) + } + return nil +} diff --git a/weed/topology/topo_test.go b/weed/topology/topo_test.go new file mode 100644 index 000000000..9a0dbc6b8 --- /dev/null +++ b/weed/topology/topo_test.go @@ -0,0 +1,17 @@ +package topology + +import ( + "testing" +) + +func TestRemoveDataCenter(t *testing.T) { + topo := setup(topologyLayout) + topo.UnlinkChildNode(NodeId("dc2")) + if topo.GetActiveVolumeCount() != 15 { + t.Fail() + } + topo.UnlinkChildNode(NodeId("dc3")) + if topo.GetActiveVolumeCount() != 12 { + t.Fail() + } +} diff --git a/weed/topology/topology.go b/weed/topology/topology.go new file mode 100644 index 000000000..04b500053 --- /dev/null +++ b/weed/topology/topology.go @@ -0,0 +1,189 @@ +package topology + +import ( + "errors" + "io/ioutil" + "math/rand" + + "github.com/chrislusf/raft" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/sequence" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type Topology struct { + NodeImpl + + collectionMap *util.ConcurrentReadMap + + pulse int64 + + volumeSizeLimit uint64 + + Sequence sequence.Sequencer + + chanDeadDataNodes chan *DataNode + chanRecoveredDataNodes chan *DataNode + chanFullVolumes chan storage.VolumeInfo + + configuration *Configuration + + RaftServer raft.Server +} + +func NewTopology(id string, confFile string, seq sequence.Sequencer, volumeSizeLimit uint64, pulse int) (*Topology, error) { + t := &Topology{} + t.id = NodeId(id) + t.nodeType = "Topology" + t.NodeImpl.value = t + t.children = make(map[NodeId]Node) + t.collectionMap = util.NewConcurrentReadMap() + t.pulse = int64(pulse) + t.volumeSizeLimit = volumeSizeLimit + + t.Sequence = seq + + t.chanDeadDataNodes = make(chan *DataNode) + t.chanRecoveredDataNodes = make(chan *DataNode) + t.chanFullVolumes = make(chan storage.VolumeInfo) + + err := t.loadConfiguration(confFile) + + return t, err +} + +func (t *Topology) IsLeader() bool { + if leader, e := t.Leader(); e == nil { + return leader == t.RaftServer.Name() + } + return false +} + +func (t *Topology) Leader() (string, error) { + l := "" + if t.RaftServer != nil { + l = t.RaftServer.Leader() + } else { + return "", errors.New("Raft Server not ready yet!") + } + + if l == "" { + // We are a single node cluster, we are the leader + return t.RaftServer.Name(), errors.New("Raft Server not initialized!") + } + + return l, nil +} + +func (t *Topology) loadConfiguration(configurationFile string) error { + b, e := ioutil.ReadFile(configurationFile) + if e == nil { + t.configuration, e = NewConfiguration(b) + return e + } + glog.V(0).Infoln("Using default configurations.") + return nil +} + +func (t *Topology) Lookup(collection string, vid storage.VolumeId) []*DataNode { + //maybe an issue if lots of collections? + if collection == "" { + for _, c := range t.collectionMap.Items() { + if list := c.(*Collection).Lookup(vid); list != nil { + return list + } + } + } else { + if c, ok := t.collectionMap.Find(collection); ok { + return c.(*Collection).Lookup(vid) + } + } + return nil +} + +func (t *Topology) NextVolumeId() storage.VolumeId { + vid := t.GetMaxVolumeId() + next := vid.Next() + go t.RaftServer.Do(NewMaxVolumeIdCommand(next)) + return next +} + +func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool { + vl := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl) + return vl.GetActiveVolumeCount(option) > 0 +} + +func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *DataNode, error) { + vid, count, datanodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl).PickForWrite(count, option) + if err != nil || datanodes.Length() == 0 { + return "", 0, nil, errors.New("No writable volumes available!") + } + fileId, count := t.Sequence.NextFileId(count) + return storage.NewFileId(*vid, fileId, rand.Uint32()).String(), count, datanodes.Head(), nil +} + +func (t *Topology) GetVolumeLayout(collectionName string, rp *storage.ReplicaPlacement, ttl *storage.TTL) *VolumeLayout { + return t.collectionMap.Get(collectionName, func() interface{} { + return NewCollection(collectionName, t.volumeSizeLimit) + }).(*Collection).GetOrCreateVolumeLayout(rp, ttl) +} + +func (t *Topology) FindCollection(collectionName string) (*Collection, bool) { + c, hasCollection := t.collectionMap.Find(collectionName) + return c.(*Collection), hasCollection +} + +func (t *Topology) DeleteCollection(collectionName string) { + t.collectionMap.Delete(collectionName) +} + +func (t *Topology) RegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) { + t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl).RegisterVolume(&v, dn) +} +func (t *Topology) UnRegisterVolumeLayout(v storage.VolumeInfo, dn *DataNode) { + glog.Infof("removing volume info:%+v", v) + t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl).UnRegisterVolume(&v, dn) +} + +func (t *Topology) ProcessJoinMessage(joinMessage *operation.JoinMessage) { + t.Sequence.SetMax(*joinMessage.MaxFileKey) + dcName, rackName := t.configuration.Locate(*joinMessage.Ip, *joinMessage.DataCenter, *joinMessage.Rack) + dc := t.GetOrCreateDataCenter(dcName) + rack := dc.GetOrCreateRack(rackName) + dn := rack.FindDataNode(*joinMessage.Ip, int(*joinMessage.Port)) + if *joinMessage.IsInit && dn != nil { + t.UnRegisterDataNode(dn) + } + dn = rack.GetOrCreateDataNode(*joinMessage.Ip, + int(*joinMessage.Port), *joinMessage.PublicUrl, + int(*joinMessage.MaxVolumeCount)) + var volumeInfos []storage.VolumeInfo + for _, v := range joinMessage.Volumes { + if vi, err := storage.NewVolumeInfo(v); err == nil { + volumeInfos = append(volumeInfos, vi) + } else { + glog.V(0).Infoln("Fail to convert joined volume information:", err.Error()) + } + } + deletedVolumes := dn.UpdateVolumes(volumeInfos) + for _, v := range volumeInfos { + t.RegisterVolumeLayout(v, dn) + } + for _, v := range deletedVolumes { + t.UnRegisterVolumeLayout(v, dn) + } +} + +func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter { + for _, c := range t.Children() { + dc := c.(*DataCenter) + if string(dc.Id()) == dcName { + return dc + } + } + dc := NewDataCenter(dcName) + t.LinkChildNode(dc) + return dc +} diff --git a/weed/topology/topology_event_handling.go b/weed/topology/topology_event_handling.go new file mode 100644 index 000000000..737b94482 --- /dev/null +++ b/weed/topology/topology_event_handling.go @@ -0,0 +1,74 @@ +package topology + +import ( + "math/rand" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +func (t *Topology) StartRefreshWritableVolumes(garbageThreshold string) { + go func() { + for { + if t.IsLeader() { + freshThreshHold := time.Now().Unix() - 3*t.pulse //3 times of sleep interval + t.CollectDeadNodeAndFullVolumes(freshThreshHold, t.volumeSizeLimit) + } + time.Sleep(time.Duration(float32(t.pulse*1e3)*(1+rand.Float32())) * time.Millisecond) + } + }() + go func(garbageThreshold string) { + c := time.Tick(15 * time.Minute) + for _ = range c { + if t.IsLeader() { + t.Vacuum(garbageThreshold) + } + } + }(garbageThreshold) + go func() { + for { + select { + case v := <-t.chanFullVolumes: + t.SetVolumeCapacityFull(v) + case dn := <-t.chanRecoveredDataNodes: + t.RegisterRecoveredDataNode(dn) + glog.V(0).Infoln("DataNode", dn, "is back alive!") + case dn := <-t.chanDeadDataNodes: + t.UnRegisterDataNode(dn) + glog.V(0).Infoln("DataNode", dn, "is dead!") + } + } + }() +} +func (t *Topology) SetVolumeCapacityFull(volumeInfo storage.VolumeInfo) bool { + vl := t.GetVolumeLayout(volumeInfo.Collection, volumeInfo.ReplicaPlacement, volumeInfo.Ttl) + if !vl.SetVolumeCapacityFull(volumeInfo.Id) { + return false + } + for _, dn := range vl.vid2location[volumeInfo.Id].list { + if !volumeInfo.ReadOnly { + dn.UpAdjustActiveVolumeCountDelta(-1) + } + } + return true +} +func (t *Topology) UnRegisterDataNode(dn *DataNode) { + for _, v := range dn.GetVolumes() { + glog.V(0).Infoln("Removing Volume", v.Id, "from the dead volume server", dn) + vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl) + vl.SetVolumeUnavailable(dn, v.Id) + } + dn.UpAdjustVolumeCountDelta(-dn.GetVolumeCount()) + dn.UpAdjustActiveVolumeCountDelta(-dn.GetActiveVolumeCount()) + dn.UpAdjustMaxVolumeCountDelta(-dn.GetMaxVolumeCount()) + dn.Parent().UnlinkChildNode(dn.Id()) +} +func (t *Topology) RegisterRecoveredDataNode(dn *DataNode) { + for _, v := range dn.GetVolumes() { + vl := t.GetVolumeLayout(v.Collection, v.ReplicaPlacement, v.Ttl) + if vl.isWritable(&v) { + vl.SetVolumeAvailable(dn, v.Id) + } + } +} diff --git a/weed/topology/topology_map.go b/weed/topology/topology_map.go new file mode 100644 index 000000000..ce8e9e663 --- /dev/null +++ b/weed/topology/topology_map.go @@ -0,0 +1,53 @@ +package topology + +func (t *Topology) ToMap() interface{} { + m := make(map[string]interface{}) + m["Max"] = t.GetMaxVolumeCount() + m["Free"] = t.FreeSpace() + var dcs []interface{} + for _, c := range t.Children() { + dc := c.(*DataCenter) + dcs = append(dcs, dc.ToMap()) + } + m["DataCenters"] = dcs + var layouts []interface{} + for _, col := range t.collectionMap.Items() { + c := col.(*Collection) + for _, layout := range c.storageType2VolumeLayout.Items() { + if layout != nil { + tmp := layout.(*VolumeLayout).ToMap() + tmp["collection"] = c.Name + layouts = append(layouts, tmp) + } + } + } + m["layouts"] = layouts + return m +} + +func (t *Topology) ToVolumeMap() interface{} { + m := make(map[string]interface{}) + m["Max"] = t.GetMaxVolumeCount() + m["Free"] = t.FreeSpace() + dcs := make(map[NodeId]interface{}) + for _, c := range t.Children() { + dc := c.(*DataCenter) + racks := make(map[NodeId]interface{}) + for _, r := range dc.Children() { + rack := r.(*Rack) + dataNodes := make(map[NodeId]interface{}) + for _, d := range rack.Children() { + dn := d.(*DataNode) + var volumes []interface{} + for _, v := range dn.GetVolumes() { + volumes = append(volumes, v) + } + dataNodes[d.Id()] = volumes + } + racks[r.Id()] = dataNodes + } + dcs[dc.Id()] = racks + } + m["DataCenters"] = dcs + return m +} diff --git a/weed/topology/topology_vacuum.go b/weed/topology/topology_vacuum.go new file mode 100644 index 000000000..8cf8dfbeb --- /dev/null +++ b/weed/topology/topology_vacuum.go @@ -0,0 +1,158 @@ +package topology + +import ( + "encoding/json" + "errors" + "net/url" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func batchVacuumVolumeCheck(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList, garbageThreshold string) bool { + ch := make(chan bool, locationlist.Length()) + for index, dn := range locationlist.list { + go func(index int, url string, vid storage.VolumeId) { + //glog.V(0).Infoln(index, "Check vacuuming", vid, "on", dn.Url()) + if e, ret := vacuumVolume_Check(url, vid, garbageThreshold); e != nil { + //glog.V(0).Infoln(index, "Error when checking vacuuming", vid, "on", url, e) + ch <- false + } else { + //glog.V(0).Infoln(index, "Checked vacuuming", vid, "on", url, "needVacuum", ret) + ch <- ret + } + }(index, dn.Url(), vid) + } + isCheckSuccess := true + for _ = range locationlist.list { + select { + case canVacuum := <-ch: + isCheckSuccess = isCheckSuccess && canVacuum + case <-time.After(30 * time.Minute): + isCheckSuccess = false + break + } + } + return isCheckSuccess +} +func batchVacuumVolumeCompact(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList) bool { + vl.removeFromWritable(vid) + ch := make(chan bool, locationlist.Length()) + for index, dn := range locationlist.list { + go func(index int, url string, vid storage.VolumeId) { + glog.V(0).Infoln(index, "Start vacuuming", vid, "on", url) + if e := vacuumVolume_Compact(url, vid); e != nil { + glog.V(0).Infoln(index, "Error when vacuuming", vid, "on", url, e) + ch <- false + } else { + glog.V(0).Infoln(index, "Complete vacuuming", vid, "on", url) + ch <- true + } + }(index, dn.Url(), vid) + } + isVacuumSuccess := true + for _ = range locationlist.list { + select { + case _ = <-ch: + case <-time.After(30 * time.Minute): + isVacuumSuccess = false + break + } + } + return isVacuumSuccess +} +func batchVacuumVolumeCommit(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList) bool { + isCommitSuccess := true + for _, dn := range locationlist.list { + glog.V(0).Infoln("Start Commiting vacuum", vid, "on", dn.Url()) + if e := vacuumVolume_Commit(dn.Url(), vid); e != nil { + glog.V(0).Infoln("Error when committing vacuum", vid, "on", dn.Url(), e) + isCommitSuccess = false + } else { + glog.V(0).Infoln("Complete Commiting vacuum", vid, "on", dn.Url()) + } + if isCommitSuccess { + vl.SetVolumeAvailable(dn, vid) + } + } + return isCommitSuccess +} +func (t *Topology) Vacuum(garbageThreshold string) int { + glog.V(0).Infoln("Start vacuum on demand") + for _, col := range t.collectionMap.Items() { + c := col.(*Collection) + glog.V(0).Infoln("vacuum on collection:", c.Name) + for _, vl := range c.storageType2VolumeLayout.Items() { + if vl != nil { + volumeLayout := vl.(*VolumeLayout) + for vid, locationlist := range volumeLayout.vid2location { + glog.V(0).Infoln("vacuum on collection:", c.Name, "volume", vid) + if batchVacuumVolumeCheck(volumeLayout, vid, locationlist, garbageThreshold) { + if batchVacuumVolumeCompact(volumeLayout, vid, locationlist) { + batchVacuumVolumeCommit(volumeLayout, vid, locationlist) + } + } + } + } + } + } + return 0 +} + +type VacuumVolumeResult struct { + Result bool + Error string +} + +func vacuumVolume_Check(urlLocation string, vid storage.VolumeId, garbageThreshold string) (error, bool) { + values := make(url.Values) + values.Add("volume", vid.String()) + values.Add("garbageThreshold", garbageThreshold) + jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum/check", values) + if err != nil { + glog.V(0).Infoln("parameters:", values) + return err, false + } + var ret VacuumVolumeResult + if err := json.Unmarshal(jsonBlob, &ret); err != nil { + return err, false + } + if ret.Error != "" { + return errors.New(ret.Error), false + } + return nil, ret.Result +} +func vacuumVolume_Compact(urlLocation string, vid storage.VolumeId) error { + values := make(url.Values) + values.Add("volume", vid.String()) + jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum/compact", values) + if err != nil { + return err + } + var ret VacuumVolumeResult + if err := json.Unmarshal(jsonBlob, &ret); err != nil { + return err + } + if ret.Error != "" { + return errors.New(ret.Error) + } + return nil +} +func vacuumVolume_Commit(urlLocation string, vid storage.VolumeId) error { + values := make(url.Values) + values.Add("volume", vid.String()) + jsonBlob, err := util.Post("http://"+urlLocation+"/admin/vacuum/commit", values) + if err != nil { + return err + } + var ret VacuumVolumeResult + if err := json.Unmarshal(jsonBlob, &ret); err != nil { + return err + } + if ret.Error != "" { + return errors.New(ret.Error) + } + return nil +} diff --git a/weed/topology/volume_growth.go b/weed/topology/volume_growth.go new file mode 100644 index 000000000..3a1c9c567 --- /dev/null +++ b/weed/topology/volume_growth.go @@ -0,0 +1,211 @@ +package topology + +import ( + "fmt" + "math/rand" + "sync" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +/* +This package is created to resolve these replica placement issues: +1. growth factor for each replica level, e.g., add 10 volumes for 1 copy, 20 volumes for 2 copies, 30 volumes for 3 copies +2. in time of tight storage, how to reduce replica level +3. optimizing for hot data on faster disk, cold data on cheaper storage, +4. volume allocation for each bucket +*/ + +type VolumeGrowOption struct { + Collection string + ReplicaPlacement *storage.ReplicaPlacement + Ttl *storage.TTL + DataCenter string + Rack string + DataNode string +} + +type VolumeGrowth struct { + accessLock sync.Mutex +} + +func (o *VolumeGrowOption) String() string { + return fmt.Sprintf("Collection:%s, ReplicaPlacement:%v, Ttl:%v, DataCenter:%s, Rack:%s, DataNode:%s", o.Collection, o.ReplicaPlacement, o.Ttl, o.DataCenter, o.Rack, o.DataNode) +} + +func NewDefaultVolumeGrowth() *VolumeGrowth { + return &VolumeGrowth{} +} + +// one replication type may need rp.GetCopyCount() actual volumes +// given copyCount, how many logical volumes to create +func (vg *VolumeGrowth) findVolumeCount(copyCount int) (count int) { + switch copyCount { + case 1: + count = 7 + case 2: + count = 6 + case 3: + count = 3 + default: + count = 1 + } + return +} + +func (vg *VolumeGrowth) AutomaticGrowByType(option *VolumeGrowOption, topo *Topology) (count int, err error) { + count, err = vg.GrowByCountAndType(vg.findVolumeCount(option.ReplicaPlacement.GetCopyCount()), option, topo) + if count > 0 && count%option.ReplicaPlacement.GetCopyCount() == 0 { + return count, nil + } + return count, err +} +func (vg *VolumeGrowth) GrowByCountAndType(targetCount int, option *VolumeGrowOption, topo *Topology) (counter int, err error) { + vg.accessLock.Lock() + defer vg.accessLock.Unlock() + + for i := 0; i < targetCount; i++ { + if c, e := vg.findAndGrow(topo, option); e == nil { + counter += c + } else { + return counter, e + } + } + return +} + +func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (int, error) { + servers, e := vg.findEmptySlotsForOneVolume(topo, option) + if e != nil { + return 0, e + } + vid := topo.NextVolumeId() + err := vg.grow(topo, vid, option, servers...) + return len(servers), err +} + +// 1. find the main data node +// 1.1 collect all data nodes that have 1 slots +// 2.2 collect all racks that have rp.SameRackCount+1 +// 2.2 collect all data centers that have DiffRackCount+rp.SameRackCount+1 +// 2. find rest data nodes +func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption) (servers []*DataNode, err error) { + //find main datacenter and other data centers + rp := option.ReplicaPlacement + mainDataCenter, otherDataCenters, dc_err := topo.RandomlyPickNodes(rp.DiffDataCenterCount+1, func(node Node) error { + if option.DataCenter != "" && node.IsDataCenter() && node.Id() != NodeId(option.DataCenter) { + return fmt.Errorf("Not matching preferred data center:%s", option.DataCenter) + } + if len(node.Children()) < rp.DiffRackCount+1 { + return fmt.Errorf("Only has %d racks, not enough for %d.", len(node.Children()), rp.DiffRackCount+1) + } + if node.FreeSpace() < rp.DiffRackCount+rp.SameRackCount+1 { + return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.DiffRackCount+rp.SameRackCount+1) + } + possibleRacksCount := 0 + for _, rack := range node.Children() { + possibleDataNodesCount := 0 + for _, n := range rack.Children() { + if n.FreeSpace() >= 1 { + possibleDataNodesCount++ + } + } + if possibleDataNodesCount >= rp.SameRackCount+1 { + possibleRacksCount++ + } + } + if possibleRacksCount < rp.DiffRackCount+1 { + return fmt.Errorf("Only has %d racks with more than %d free data nodes, not enough for %d.", possibleRacksCount, rp.SameRackCount+1, rp.DiffRackCount+1) + } + return nil + }) + if dc_err != nil { + return nil, dc_err + } + + //find main rack and other racks + mainRack, otherRacks, rack_err := mainDataCenter.(*DataCenter).RandomlyPickNodes(rp.DiffRackCount+1, func(node Node) error { + if option.Rack != "" && node.IsRack() && node.Id() != NodeId(option.Rack) { + return fmt.Errorf("Not matching preferred rack:%s", option.Rack) + } + if node.FreeSpace() < rp.SameRackCount+1 { + return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.SameRackCount+1) + } + if len(node.Children()) < rp.SameRackCount+1 { + // a bit faster way to test free racks + return fmt.Errorf("Only has %d data nodes, not enough for %d.", len(node.Children()), rp.SameRackCount+1) + } + possibleDataNodesCount := 0 + for _, n := range node.Children() { + if n.FreeSpace() >= 1 { + possibleDataNodesCount++ + } + } + if possibleDataNodesCount < rp.SameRackCount+1 { + return fmt.Errorf("Only has %d data nodes with a slot, not enough for %d.", possibleDataNodesCount, rp.SameRackCount+1) + } + return nil + }) + if rack_err != nil { + return nil, rack_err + } + + //find main rack and other racks + mainServer, otherServers, server_err := mainRack.(*Rack).RandomlyPickNodes(rp.SameRackCount+1, func(node Node) error { + if option.DataNode != "" && node.IsDataNode() && node.Id() != NodeId(option.DataNode) { + return fmt.Errorf("Not matching preferred data node:%s", option.DataNode) + } + if node.FreeSpace() < 1 { + return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), 1) + } + return nil + }) + if server_err != nil { + return nil, server_err + } + + servers = append(servers, mainServer.(*DataNode)) + for _, server := range otherServers { + servers = append(servers, server.(*DataNode)) + } + for _, rack := range otherRacks { + r := rand.Intn(rack.FreeSpace()) + if server, e := rack.ReserveOneVolume(r); e == nil { + servers = append(servers, server) + } else { + return servers, e + } + } + for _, datacenter := range otherDataCenters { + r := rand.Intn(datacenter.FreeSpace()) + if server, e := datacenter.ReserveOneVolume(r); e == nil { + servers = append(servers, server) + } else { + return servers, e + } + } + return +} + +func (vg *VolumeGrowth) grow(topo *Topology, vid storage.VolumeId, option *VolumeGrowOption, servers ...*DataNode) error { + for _, server := range servers { + if err := AllocateVolume(server, vid, option); err == nil { + vi := storage.VolumeInfo{ + Id: vid, + Size: 0, + Collection: option.Collection, + ReplicaPlacement: option.ReplicaPlacement, + Ttl: option.Ttl, + Version: storage.CurrentVersion, + } + server.AddOrUpdateVolume(vi) + topo.RegisterVolumeLayout(vi, server) + glog.V(0).Infoln("Created Volume", vid, "on", server.NodeImpl.String()) + } else { + glog.V(0).Infoln("Failed to assign volume", vid, "to", servers, "error", err) + return fmt.Errorf("Failed to assign %d: %v", vid, err) + } + } + return nil +} diff --git a/weed/topology/volume_growth_test.go b/weed/topology/volume_growth_test.go new file mode 100644 index 000000000..e5716674a --- /dev/null +++ b/weed/topology/volume_growth_test.go @@ -0,0 +1,135 @@ +package topology + +import ( + "encoding/json" + "fmt" + "testing" + + "github.com/chrislusf/seaweedfs/weed/sequence" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +var topologyLayout = ` +{ + "dc1":{ + "rack1":{ + "server111":{ + "volumes":[ + {"id":1, "size":12312}, + {"id":2, "size":12312}, + {"id":3, "size":12312} + ], + "limit":3 + }, + "server112":{ + "volumes":[ + {"id":4, "size":12312}, + {"id":5, "size":12312}, + {"id":6, "size":12312} + ], + "limit":10 + } + }, + "rack2":{ + "server121":{ + "volumes":[ + {"id":4, "size":12312}, + {"id":5, "size":12312}, + {"id":6, "size":12312} + ], + "limit":4 + }, + "server122":{ + "volumes":[], + "limit":4 + }, + "server123":{ + "volumes":[ + {"id":2, "size":12312}, + {"id":3, "size":12312}, + {"id":4, "size":12312} + ], + "limit":5 + } + } + }, + "dc2":{ + }, + "dc3":{ + "rack2":{ + "server321":{ + "volumes":[ + {"id":1, "size":12312}, + {"id":3, "size":12312}, + {"id":5, "size":12312} + ], + "limit":4 + } + } + } +} +` + +func setup(topologyLayout string) *Topology { + var data interface{} + err := json.Unmarshal([]byte(topologyLayout), &data) + if err != nil { + fmt.Println("error:", err) + } + fmt.Println("data:", data) + + //need to connect all nodes first before server adding volumes + topo, err := NewTopology("weedfs", "/etc/weedfs/weedfs.conf", + sequence.NewMemorySequencer(), 32*1024, 5) + if err != nil { + panic("error: " + err.Error()) + } + mTopology := data.(map[string]interface{}) + for dcKey, dcValue := range mTopology { + dc := NewDataCenter(dcKey) + dcMap := dcValue.(map[string]interface{}) + topo.LinkChildNode(dc) + for rackKey, rackValue := range dcMap { + rack := NewRack(rackKey) + rackMap := rackValue.(map[string]interface{}) + dc.LinkChildNode(rack) + for serverKey, serverValue := range rackMap { + server := NewDataNode(serverKey) + serverMap := serverValue.(map[string]interface{}) + rack.LinkChildNode(server) + for _, v := range serverMap["volumes"].([]interface{}) { + m := v.(map[string]interface{}) + vi := storage.VolumeInfo{ + Id: storage.VolumeId(int64(m["id"].(float64))), + Size: uint64(m["size"].(float64)), + Version: storage.CurrentVersion} + server.AddOrUpdateVolume(vi) + } + server.UpAdjustMaxVolumeCountDelta(int(serverMap["limit"].(float64))) + } + } + } + + return topo +} + +func TestFindEmptySlotsForOneVolume(t *testing.T) { + topo := setup(topologyLayout) + vg := NewDefaultVolumeGrowth() + rp, _ := storage.NewReplicaPlacementFromString("002") + volumeGrowOption := &VolumeGrowOption{ + Collection: "", + ReplicaPlacement: rp, + DataCenter: "dc1", + Rack: "", + DataNode: "", + } + servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption) + if err != nil { + fmt.Println("finding empty slots error :", err) + t.Fail() + } + for _, server := range servers { + fmt.Println("assigned node :", server.Id()) + } +} diff --git a/weed/topology/volume_layout.go b/weed/topology/volume_layout.go new file mode 100644 index 000000000..e500de583 --- /dev/null +++ b/weed/topology/volume_layout.go @@ -0,0 +1,226 @@ +package topology + +import ( + "errors" + "fmt" + "math/rand" + "sync" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" +) + +// mapping from volume to its locations, inverted from server to volume +type VolumeLayout struct { + rp *storage.ReplicaPlacement + ttl *storage.TTL + vid2location map[storage.VolumeId]*VolumeLocationList + writables []storage.VolumeId // transient array of writable volume id + volumeSizeLimit uint64 + accessLock sync.RWMutex +} + +func NewVolumeLayout(rp *storage.ReplicaPlacement, ttl *storage.TTL, volumeSizeLimit uint64) *VolumeLayout { + return &VolumeLayout{ + rp: rp, + ttl: ttl, + vid2location: make(map[storage.VolumeId]*VolumeLocationList), + writables: *new([]storage.VolumeId), + volumeSizeLimit: volumeSizeLimit, + } +} + +func (vl *VolumeLayout) String() string { + return fmt.Sprintf("rp:%v, ttl:%v, vid2location:%v, writables:%v, volumeSizeLimit:%v", vl.rp, vl.ttl, vl.vid2location, vl.writables, vl.volumeSizeLimit) +} + +func (vl *VolumeLayout) RegisterVolume(v *storage.VolumeInfo, dn *DataNode) { + vl.accessLock.Lock() + defer vl.accessLock.Unlock() + + if _, ok := vl.vid2location[v.Id]; !ok { + vl.vid2location[v.Id] = NewVolumeLocationList() + } + vl.vid2location[v.Id].Set(dn) + glog.V(4).Infoln("volume", v.Id, "added to dn", dn.Id(), "len", vl.vid2location[v.Id].Length(), "copy", v.ReplicaPlacement.GetCopyCount()) + if vl.vid2location[v.Id].Length() == vl.rp.GetCopyCount() && vl.isWritable(v) { + vl.addToWritable(v.Id) + } else { + vl.removeFromWritable(v.Id) + } +} + +func (vl *VolumeLayout) UnRegisterVolume(v *storage.VolumeInfo, dn *DataNode) { + vl.accessLock.Lock() + defer vl.accessLock.Unlock() + + vl.removeFromWritable(v.Id) + delete(vl.vid2location, v.Id) +} + +func (vl *VolumeLayout) addToWritable(vid storage.VolumeId) { + for _, id := range vl.writables { + if vid == id { + return + } + } + vl.writables = append(vl.writables, vid) +} + +func (vl *VolumeLayout) isWritable(v *storage.VolumeInfo) bool { + return uint64(v.Size) < vl.volumeSizeLimit && + v.Version == storage.CurrentVersion && + !v.ReadOnly +} + +func (vl *VolumeLayout) Lookup(vid storage.VolumeId) []*DataNode { + vl.accessLock.RLock() + defer vl.accessLock.RUnlock() + + if location := vl.vid2location[vid]; location != nil { + return location.list + } + return nil +} + +func (vl *VolumeLayout) ListVolumeServers() (nodes []*DataNode) { + vl.accessLock.RLock() + defer vl.accessLock.RUnlock() + + for _, location := range vl.vid2location { + nodes = append(nodes, location.list...) + } + return +} + +func (vl *VolumeLayout) PickForWrite(count uint64, option *VolumeGrowOption) (*storage.VolumeId, uint64, *VolumeLocationList, error) { + vl.accessLock.RLock() + defer vl.accessLock.RUnlock() + + len_writers := len(vl.writables) + if len_writers <= 0 { + glog.V(0).Infoln("No more writable volumes!") + return nil, 0, nil, errors.New("No more writable volumes!") + } + if option.DataCenter == "" { + vid := vl.writables[rand.Intn(len_writers)] + locationList := vl.vid2location[vid] + if locationList != nil { + return &vid, count, locationList, nil + } + return nil, 0, nil, errors.New("Strangely vid " + vid.String() + " is on no machine!") + } + var vid storage.VolumeId + var locationList *VolumeLocationList + counter := 0 + for _, v := range vl.writables { + volumeLocationList := vl.vid2location[v] + for _, dn := range volumeLocationList.list { + if dn.GetDataCenter().Id() == NodeId(option.DataCenter) { + if option.Rack != "" && dn.GetRack().Id() != NodeId(option.Rack) { + continue + } + if option.DataNode != "" && dn.Id() != NodeId(option.DataNode) { + continue + } + counter++ + if rand.Intn(counter) < 1 { + vid, locationList = v, volumeLocationList + } + } + } + } + return &vid, count, locationList, nil +} + +func (vl *VolumeLayout) GetActiveVolumeCount(option *VolumeGrowOption) int { + vl.accessLock.RLock() + defer vl.accessLock.RUnlock() + + if option.DataCenter == "" { + return len(vl.writables) + } + counter := 0 + for _, v := range vl.writables { + for _, dn := range vl.vid2location[v].list { + if dn.GetDataCenter().Id() == NodeId(option.DataCenter) { + if option.Rack != "" && dn.GetRack().Id() != NodeId(option.Rack) { + continue + } + if option.DataNode != "" && dn.Id() != NodeId(option.DataNode) { + continue + } + counter++ + } + } + } + return counter +} + +func (vl *VolumeLayout) removeFromWritable(vid storage.VolumeId) bool { + toDeleteIndex := -1 + for k, id := range vl.writables { + if id == vid { + toDeleteIndex = k + break + } + } + if toDeleteIndex >= 0 { + glog.V(0).Infoln("Volume", vid, "becomes unwritable") + vl.writables = append(vl.writables[0:toDeleteIndex], vl.writables[toDeleteIndex+1:]...) + return true + } + return false +} +func (vl *VolumeLayout) setVolumeWritable(vid storage.VolumeId) bool { + for _, v := range vl.writables { + if v == vid { + return false + } + } + glog.V(0).Infoln("Volume", vid, "becomes writable") + vl.writables = append(vl.writables, vid) + return true +} + +func (vl *VolumeLayout) SetVolumeUnavailable(dn *DataNode, vid storage.VolumeId) bool { + vl.accessLock.Lock() + defer vl.accessLock.Unlock() + + if location, ok := vl.vid2location[vid]; ok { + if location.Remove(dn) { + if location.Length() < vl.rp.GetCopyCount() { + glog.V(0).Infoln("Volume", vid, "has", location.Length(), "replica, less than required", vl.rp.GetCopyCount()) + return vl.removeFromWritable(vid) + } + } + } + return false +} +func (vl *VolumeLayout) SetVolumeAvailable(dn *DataNode, vid storage.VolumeId) bool { + vl.accessLock.Lock() + defer vl.accessLock.Unlock() + + vl.vid2location[vid].Set(dn) + if vl.vid2location[vid].Length() >= vl.rp.GetCopyCount() { + return vl.setVolumeWritable(vid) + } + return false +} + +func (vl *VolumeLayout) SetVolumeCapacityFull(vid storage.VolumeId) bool { + vl.accessLock.Lock() + defer vl.accessLock.Unlock() + + // glog.V(0).Infoln("Volume", vid, "reaches full capacity.") + return vl.removeFromWritable(vid) +} + +func (vl *VolumeLayout) ToMap() map[string]interface{} { + m := make(map[string]interface{}) + m["replication"] = vl.rp.String() + m["ttl"] = vl.ttl.String() + m["writables"] = vl.writables + //m["locations"] = vl.vid2location + return m +} diff --git a/weed/topology/volume_location_list.go b/weed/topology/volume_location_list.go new file mode 100644 index 000000000..d5eaf5e92 --- /dev/null +++ b/weed/topology/volume_location_list.go @@ -0,0 +1,65 @@ +package topology + +import ( + "fmt" +) + +type VolumeLocationList struct { + list []*DataNode +} + +func NewVolumeLocationList() *VolumeLocationList { + return &VolumeLocationList{} +} + +func (dnll *VolumeLocationList) String() string { + return fmt.Sprintf("%v", dnll.list) +} + +func (dnll *VolumeLocationList) Head() *DataNode { + //mark first node as master volume + return dnll.list[0] +} + +func (dnll *VolumeLocationList) Length() int { + return len(dnll.list) +} + +func (dnll *VolumeLocationList) Set(loc *DataNode) { + for i := 0; i < len(dnll.list); i++ { + if loc.Ip == dnll.list[i].Ip && loc.Port == dnll.list[i].Port { + dnll.list[i] = loc + return + } + } + dnll.list = append(dnll.list, loc) +} + +func (dnll *VolumeLocationList) Remove(loc *DataNode) bool { + for i, dnl := range dnll.list { + if loc.Ip == dnl.Ip && loc.Port == dnl.Port { + dnll.list = append(dnll.list[:i], dnll.list[i+1:]...) + return true + } + } + return false +} + +func (dnll *VolumeLocationList) Refresh(freshThreshHold int64) { + var changed bool + for _, dnl := range dnll.list { + if dnl.LastSeen < freshThreshHold { + changed = true + break + } + } + if changed { + var l []*DataNode + for _, dnl := range dnll.list { + if dnl.LastSeen >= freshThreshHold { + l = append(l, dnl) + } + } + dnll.list = l + } +} diff --git a/weed/util/bytes.go b/weed/util/bytes.go new file mode 100644 index 000000000..dfa4ae665 --- /dev/null +++ b/weed/util/bytes.go @@ -0,0 +1,45 @@ +package util + +// big endian + +func BytesToUint64(b []byte) (v uint64) { + length := uint(len(b)) + for i := uint(0); i < length-1; i++ { + v += uint64(b[i]) + v <<= 8 + } + v += uint64(b[length-1]) + return +} +func BytesToUint32(b []byte) (v uint32) { + length := uint(len(b)) + for i := uint(0); i < length-1; i++ { + v += uint32(b[i]) + v <<= 8 + } + v += uint32(b[length-1]) + return +} +func BytesToUint16(b []byte) (v uint16) { + v += uint16(b[0]) + v <<= 8 + v += uint16(b[1]) + return +} +func Uint64toBytes(b []byte, v uint64) { + for i := uint(0); i < 8; i++ { + b[7-i] = byte(v >> (i * 8)) + } +} +func Uint32toBytes(b []byte, v uint32) { + for i := uint(0); i < 4; i++ { + b[3-i] = byte(v >> (i * 8)) + } +} +func Uint16toBytes(b []byte, v uint16) { + b[0] = byte(v >> 8) + b[1] = byte(v) +} +func Uint8toBytes(b []byte, v uint8) { + b[0] = byte(v) +} diff --git a/weed/util/bytes_pool.go b/weed/util/bytes_pool.go new file mode 100644 index 000000000..58ed6feca --- /dev/null +++ b/weed/util/bytes_pool.go @@ -0,0 +1,127 @@ +package util + +import ( + "bytes" + "fmt" + "sync" + "sync/atomic" + "time" +) + +var ( + ChunkSizes = []int{ + 1 << 4, // index 0, 16 bytes, inclusive + 1 << 6, // index 1, 64 bytes + 1 << 8, // index 2, 256 bytes + 1 << 10, // index 3, 1K bytes + 1 << 12, // index 4, 4K bytes + 1 << 14, // index 5, 16K bytes + 1 << 16, // index 6, 64K bytes + 1 << 18, // index 7, 256K bytes + 1 << 20, // index 8, 1M bytes + 1 << 22, // index 9, 4M bytes + 1 << 24, // index 10, 16M bytes + 1 << 26, // index 11, 64M bytes + 1 << 28, // index 12, 128M bytes + } + + _DEBUG = false +) + +type BytesPool struct { + chunkPools []*byteChunkPool +} + +func NewBytesPool() *BytesPool { + var bp BytesPool + for _, size := range ChunkSizes { + bp.chunkPools = append(bp.chunkPools, newByteChunkPool(size)) + } + ret := &bp + if _DEBUG { + t := time.NewTicker(10 * time.Second) + go func() { + for { + println("buffer:", ret.String()) + <-t.C + } + }() + } + return ret +} + +func (m *BytesPool) String() string { + var buf bytes.Buffer + for index, size := range ChunkSizes { + if m.chunkPools[index].count > 0 { + buf.WriteString(fmt.Sprintf("size:%d count:%d\n", size, m.chunkPools[index].count)) + } + } + return buf.String() +} + +func findChunkPoolIndex(size int) int { + if size <= 0 { + return -1 + } + size = (size - 1) >> 4 + ret := 0 + for size > 0 { + size = size >> 2 + ret = ret + 1 + } + if ret >= len(ChunkSizes) { + return -1 + } + return ret +} + +func (m *BytesPool) Get(size int) []byte { + index := findChunkPoolIndex(size) + // println("get index:", index) + if index < 0 { + return make([]byte, size) + } + return m.chunkPools[index].Get() +} + +func (m *BytesPool) Put(b []byte) { + index := findChunkPoolIndex(len(b)) + // println("put index:", index) + if index < 0 { + return + } + m.chunkPools[index].Put(b) +} + +// a pool of fix-sized []byte chunks. The pool size is managed by Go GC +type byteChunkPool struct { + sync.Pool + chunkSizeLimit int + count int64 +} + +var count int + +func newByteChunkPool(chunkSizeLimit int) *byteChunkPool { + var m byteChunkPool + m.chunkSizeLimit = chunkSizeLimit + m.Pool.New = func() interface{} { + count++ + // println("creating []byte size", m.chunkSizeLimit, "new", count, "count", m.count) + return make([]byte, m.chunkSizeLimit) + } + return &m +} + +func (m *byteChunkPool) Get() []byte { + // println("before get size:", m.chunkSizeLimit, "count:", m.count) + atomic.AddInt64(&m.count, 1) + return m.Pool.Get().([]byte) +} + +func (m *byteChunkPool) Put(b []byte) { + atomic.AddInt64(&m.count, -1) + // println("after put get size:", m.chunkSizeLimit, "count:", m.count) + m.Pool.Put(b) +} diff --git a/weed/util/bytes_pool_test.go b/weed/util/bytes_pool_test.go new file mode 100644 index 000000000..3f37c16cf --- /dev/null +++ b/weed/util/bytes_pool_test.go @@ -0,0 +1,41 @@ +package util + +import ( + "testing" +) + +func TestTTLReadWrite(t *testing.T) { + var tests = []struct { + n int // input + expected int // expected result + }{ + {0, -1}, + {1, 0}, + {1 << 4, 0}, + {1 << 6, 1}, + {1 << 8, 2}, + {1 << 10, 3}, + {1 << 12, 4}, + {1 << 14, 5}, + {1 << 16, 6}, + {1 << 18, 7}, + {1<<4 + 1, 1}, + {1<<6 + 1, 2}, + {1<<8 + 1, 3}, + {1<<10 + 1, 4}, + {1<<12 + 1, 5}, + {1<<14 + 1, 6}, + {1<<16 + 1, 7}, + {1<<18 + 1, 8}, + {1<<28 - 1, 12}, + {1 << 28, 12}, + {1<<28 + 2134, -1}, + {1080, 4}, + } + for _, tt := range tests { + actual := findChunkPoolIndex(tt.n) + if actual != tt.expected { + t.Errorf("findChunkPoolIndex(%d): expected %d, actual %d", tt.n, tt.expected, actual) + } + } +} diff --git a/weed/util/concurrent_read_map.go b/weed/util/concurrent_read_map.go new file mode 100644 index 000000000..28b6ae0f1 --- /dev/null +++ b/weed/util/concurrent_read_map.go @@ -0,0 +1,60 @@ +package util + +import ( + "sync" +) + +// A mostly for read map, which can thread-safely +// initialize the map entries. +type ConcurrentReadMap struct { + sync.RWMutex + + items map[string]interface{} +} + +func NewConcurrentReadMap() *ConcurrentReadMap { + return &ConcurrentReadMap{items: make(map[string]interface{})} +} + +func (m *ConcurrentReadMap) initMapEntry(key string, newEntry func() interface{}) (value interface{}) { + m.Lock() + defer m.Unlock() + if value, ok := m.items[key]; ok { + return value + } + value = newEntry() + m.items[key] = value + return value +} + +func (m *ConcurrentReadMap) Get(key string, newEntry func() interface{}) interface{} { + m.RLock() + if value, ok := m.items[key]; ok { + m.RUnlock() + return value + } + m.RUnlock() + return m.initMapEntry(key, newEntry) +} + +func (m *ConcurrentReadMap) Find(key string) (interface{}, bool) { + m.RLock() + value, ok := m.items[key] + m.RUnlock() + return value, ok +} + +func (m *ConcurrentReadMap) Items() (itemsCopy []interface{}) { + m.RLock() + for _, i := range m.items { + itemsCopy = append(itemsCopy, i) + } + m.RUnlock() + return itemsCopy +} + +func (m *ConcurrentReadMap) Delete(key string) { + m.Lock() + delete(m.items, key) + m.Unlock() +} diff --git a/weed/util/config.go b/weed/util/config.go new file mode 100644 index 000000000..e4549c322 --- /dev/null +++ b/weed/util/config.go @@ -0,0 +1,130 @@ +package util + +// Copyright 2011 Numerotron Inc. +// Use of this source code is governed by an MIT-style license +// that can be found in the LICENSE file. +// +// Developed at www.stathat.com by Patrick Crosby +// Contact us on twitter with any questions: twitter.com/stat_hat + +// The jconfig package provides a simple, basic configuration file parser using JSON. + +import ( + "bytes" + "encoding/json" + "os" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type Config struct { + data map[string]interface{} + filename string +} + +func newConfig() *Config { + result := new(Config) + result.data = make(map[string]interface{}) + return result +} + +// Loads config information from a JSON file +func LoadConfig(filename string) *Config { + result := newConfig() + result.filename = filename + err := result.parse() + if err != nil { + glog.Fatalf("error loading config file %s: %s", filename, err) + } + return result +} + +// Loads config information from a JSON string +func LoadConfigString(s string) *Config { + result := newConfig() + err := json.Unmarshal([]byte(s), &result.data) + if err != nil { + glog.Fatalf("error parsing config string %s: %s", s, err) + } + return result +} + +func (c *Config) StringMerge(s string) { + next := LoadConfigString(s) + c.merge(next.data) +} + +func (c *Config) LoadMerge(filename string) { + next := LoadConfig(filename) + c.merge(next.data) +} + +func (c *Config) merge(ndata map[string]interface{}) { + for k, v := range ndata { + c.data[k] = v + } +} + +func (c *Config) parse() error { + f, err := os.Open(c.filename) + if err != nil { + return err + } + defer f.Close() + b := new(bytes.Buffer) + _, err = b.ReadFrom(f) + if err != nil { + return err + } + err = json.Unmarshal(b.Bytes(), &c.data) + if err != nil { + return err + } + + return nil +} + +// Returns a string for the config variable key +func (c *Config) GetString(key string) string { + result, present := c.data[key] + if !present { + return "" + } + return result.(string) +} + +// Returns an int for the config variable key +func (c *Config) GetInt(key string) int { + x, ok := c.data[key] + if !ok { + return -1 + } + return int(x.(float64)) +} + +// Returns a float for the config variable key +func (c *Config) GetFloat(key string) float64 { + x, ok := c.data[key] + if !ok { + return -1 + } + return x.(float64) +} + +// Returns a bool for the config variable key +func (c *Config) GetBool(key string) bool { + x, ok := c.data[key] + if !ok { + return false + } + return x.(bool) +} + +// Returns an array for the config variable key +func (c *Config) GetArray(key string) []interface{} { + result, present := c.data[key] + if !present { + return []interface{}(nil) + } + return result.([]interface{}) +} diff --git a/weed/util/constants.go b/weed/util/constants.go new file mode 100644 index 000000000..6b6b0b911 --- /dev/null +++ b/weed/util/constants.go @@ -0,0 +1,5 @@ +package util + +const ( + VERSION = "0.71 beta" +) diff --git a/weed/util/file_util.go b/weed/util/file_util.go new file mode 100644 index 000000000..a39fb0860 --- /dev/null +++ b/weed/util/file_util.go @@ -0,0 +1,38 @@ +package util + +import ( + "bufio" + "errors" + "os" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func TestFolderWritable(folder string) (err error) { + fileInfo, err := os.Stat(folder) + if err != nil { + return err + } + if !fileInfo.IsDir() { + return errors.New("Not a valid folder!") + } + perm := fileInfo.Mode().Perm() + glog.V(0).Infoln("Folder", folder, "Permission:", perm) + if 0200&perm != 0 { + return nil + } + return errors.New("Not writable!") +} + +func Readln(r *bufio.Reader) ([]byte, error) { + var ( + isPrefix = true + err error + line, ln []byte + ) + for isPrefix && err == nil { + line, isPrefix, err = r.ReadLine() + ln = append(ln, line...) + } + return ln, err +} diff --git a/weed/util/http_util.go b/weed/util/http_util.go new file mode 100644 index 000000000..a54fc8779 --- /dev/null +++ b/weed/util/http_util.go @@ -0,0 +1,163 @@ +package util + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "net/http" + "net/url" + "strings" + + "github.com/chrislusf/seaweedfs/weed/security" +) + +var ( + client *http.Client + Transport *http.Transport +) + +func init() { + Transport = &http.Transport{ + MaxIdleConnsPerHost: 1024, + } + client = &http.Client{Transport: Transport} +} + +func PostBytes(url string, body []byte) ([]byte, error) { + r, err := client.Post(url, "application/octet-stream", bytes.NewReader(body)) + if err != nil { + return nil, fmt.Errorf("Post to %s: %v", url, err) + } + defer r.Body.Close() + b, err := ioutil.ReadAll(r.Body) + if err != nil { + return nil, fmt.Errorf("Read response body: %v", err) + } + return b, nil +} + +func Post(url string, values url.Values) ([]byte, error) { + r, err := client.PostForm(url, values) + if err != nil { + return nil, err + } + defer r.Body.Close() + b, err := ioutil.ReadAll(r.Body) + if err != nil { + return nil, err + } + return b, nil +} + +func Get(url string) ([]byte, error) { + r, err := client.Get(url) + if err != nil { + return nil, err + } + defer r.Body.Close() + b, err := ioutil.ReadAll(r.Body) + if r.StatusCode != 200 { + return nil, fmt.Errorf("%s: %s", url, r.Status) + } + if err != nil { + return nil, err + } + return b, nil +} + +func Delete(url string, jwt security.EncodedJwt) error { + req, err := http.NewRequest("DELETE", url, nil) + if jwt != "" { + req.Header.Set("Authorization", "BEARER "+string(jwt)) + } + if err != nil { + return err + } + resp, e := client.Do(req) + if e != nil { + return e + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + switch resp.StatusCode { + case http.StatusNotFound, http.StatusAccepted, http.StatusOK: + return nil + } + m := make(map[string]interface{}) + if e := json.Unmarshal(body, m); e == nil { + if s, ok := m["error"].(string); ok { + return errors.New(s) + } + } + return errors.New(string(body)) +} + +func GetBufferStream(url string, values url.Values, allocatedBytes []byte, eachBuffer func([]byte)) error { + r, err := client.PostForm(url, values) + if err != nil { + return err + } + defer r.Body.Close() + if r.StatusCode != 200 { + return fmt.Errorf("%s: %s", url, r.Status) + } + bufferSize := len(allocatedBytes) + for { + n, err := r.Body.Read(allocatedBytes) + if n == bufferSize { + eachBuffer(allocatedBytes) + } + if err != nil { + if err == io.EOF { + return nil + } + return err + } + } + return nil +} + +func GetUrlStream(url string, values url.Values, readFn func(io.Reader) error) error { + r, err := client.PostForm(url, values) + if err != nil { + return err + } + defer r.Body.Close() + if r.StatusCode != 200 { + return fmt.Errorf("%s: %s", url, r.Status) + } + return readFn(r.Body) +} + +func DownloadUrl(fileUrl string) (filename string, rc io.ReadCloser, e error) { + response, err := client.Get(fileUrl) + if err != nil { + return "", nil, err + } + contentDisposition := response.Header["Content-Disposition"] + if len(contentDisposition) > 0 { + if strings.HasPrefix(contentDisposition[0], "filename=") { + filename = contentDisposition[0][len("filename="):] + filename = strings.Trim(filename, "\"") + } + } + rc = response.Body + return +} + +func Do(req *http.Request) (resp *http.Response, err error) { + return client.Do(req) +} + +func NormalizeUrl(url string) string { + if strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://") { + return url + } + return "http://" + url +} diff --git a/weed/util/net_timeout.go b/weed/util/net_timeout.go new file mode 100644 index 000000000..f46776992 --- /dev/null +++ b/weed/util/net_timeout.go @@ -0,0 +1,81 @@ +package util + +import ( + "net" + "time" + + "github.com/chrislusf/seaweedfs/weed/stats" +) + +// Listener wraps a net.Listener, and gives a place to store the timeout +// parameters. On Accept, it will wrap the net.Conn with our own Conn for us. +type Listener struct { + net.Listener + ReadTimeout time.Duration + WriteTimeout time.Duration +} + +func (l *Listener) Accept() (net.Conn, error) { + c, err := l.Listener.Accept() + if err != nil { + return nil, err + } + stats.ConnectionOpen() + tc := &Conn{ + Conn: c, + ReadTimeout: l.ReadTimeout, + WriteTimeout: l.WriteTimeout, + } + return tc, nil +} + +// Conn wraps a net.Conn, and sets a deadline for every read +// and write operation. +type Conn struct { + net.Conn + ReadTimeout time.Duration + WriteTimeout time.Duration +} + +func (c *Conn) Read(b []byte) (count int, e error) { + err := c.Conn.SetReadDeadline(time.Now().Add(c.ReadTimeout)) + if err != nil { + return 0, err + } + count, e = c.Conn.Read(b) + if e == nil { + stats.BytesIn(int64(count)) + } + return +} + +func (c *Conn) Write(b []byte) (count int, e error) { + err := c.Conn.SetWriteDeadline(time.Now().Add(c.WriteTimeout)) + if err != nil { + return 0, err + } + count, e = c.Conn.Write(b) + if e == nil { + stats.BytesOut(int64(count)) + } + return +} + +func (c *Conn) Close() error { + stats.ConnectionClose() + return c.Conn.Close() +} + +func NewListener(addr string, timeout time.Duration) (net.Listener, error) { + l, err := net.Listen("tcp", addr) + if err != nil { + return nil, err + } + + tl := &Listener{ + Listener: l, + ReadTimeout: timeout, + WriteTimeout: timeout, + } + return tl, nil +} diff --git a/weed/util/parse.go b/weed/util/parse.go new file mode 100644 index 000000000..0a8317c19 --- /dev/null +++ b/weed/util/parse.go @@ -0,0 +1,26 @@ +package util + +import ( + "strconv" +) + +func ParseInt(text string, defaultValue int) int { + count, parseError := strconv.ParseInt(text, 10, 64) + if parseError != nil { + if len(text) > 0 { + return 0 + } + return defaultValue + } + return int(count) +} +func ParseUint64(text string, defaultValue uint64) uint64 { + count, parseError := strconv.ParseUint(text, 10, 64) + if parseError != nil { + if len(text) > 0 { + return 0 + } + return defaultValue + } + return count +} diff --git a/weed/weed.go b/weed/weed.go new file mode 100644 index 000000000..c18eac013 --- /dev/null +++ b/weed/weed.go @@ -0,0 +1,170 @@ +package main + +import ( + "flag" + "fmt" + "io" + "math/rand" + "os" + "strings" + "sync" + "text/template" + "time" + "unicode" + "unicode/utf8" + + "github.com/chrislusf/seaweedfs/weed/command" + "github.com/chrislusf/seaweedfs/weed/glog" +) + +var IsDebug *bool +var server *string + +var commands = command.Commands + +var exitStatus = 0 +var exitMu sync.Mutex + +func setExitStatus(n int) { + exitMu.Lock() + if exitStatus < n { + exitStatus = n + } + exitMu.Unlock() +} + +func main() { + glog.MaxSize = 1024 * 1024 * 32 + rand.Seed(time.Now().UnixNano()) + flag.Usage = usage + flag.Parse() + + args := flag.Args() + if len(args) < 1 { + usage() + } + + if args[0] == "help" { + help(args[1:]) + for _, cmd := range commands { + if len(args) >= 2 && cmd.Name() == args[1] && cmd.Run != nil { + fmt.Fprintf(os.Stderr, "Default Parameters:\n") + cmd.Flag.PrintDefaults() + } + } + return + } + + for _, cmd := range commands { + if cmd.Name() == args[0] && cmd.Run != nil { + cmd.Flag.Usage = func() { cmd.Usage() } + cmd.Flag.Parse(args[1:]) + args = cmd.Flag.Args() + IsDebug = cmd.IsDebug + if !cmd.Run(cmd, args) { + fmt.Fprintf(os.Stderr, "\n") + cmd.Flag.Usage() + fmt.Fprintf(os.Stderr, "Default Parameters:\n") + cmd.Flag.PrintDefaults() + } + exit() + return + } + } + + fmt.Fprintf(os.Stderr, "weed: unknown subcommand %q\nRun 'weed help' for usage.\n", args[0]) + setExitStatus(2) + exit() +} + +var usageTemplate = ` +SeaweedFS: store billions of files and serve them fast! + +Usage: + + weed command [arguments] + +The commands are: +{{range .}}{{if .Runnable}} + {{.Name | printf "%-11s"}} {{.Short}}{{end}}{{end}} + +Use "weed help [command]" for more information about a command. + +` + +var helpTemplate = `{{if .Runnable}}Usage: weed {{.UsageLine}} +{{end}} + {{.Long}} +` + +// tmpl executes the given template text on data, writing the result to w. +func tmpl(w io.Writer, text string, data interface{}) { + t := template.New("top") + t.Funcs(template.FuncMap{"trim": strings.TrimSpace, "capitalize": capitalize}) + template.Must(t.Parse(text)) + if err := t.Execute(w, data); err != nil { + panic(err) + } +} + +func capitalize(s string) string { + if s == "" { + return s + } + r, n := utf8.DecodeRuneInString(s) + return string(unicode.ToTitle(r)) + s[n:] +} + +func printUsage(w io.Writer) { + tmpl(w, usageTemplate, commands) +} + +func usage() { + printUsage(os.Stderr) + fmt.Fprintf(os.Stderr, "For Logging, use \"weed [logging_options] [command]\". The logging options are:\n") + flag.PrintDefaults() + os.Exit(2) +} + +// help implements the 'help' command. +func help(args []string) { + if len(args) == 0 { + printUsage(os.Stdout) + // not exit 2: succeeded at 'weed help'. + return + } + if len(args) != 1 { + fmt.Fprintf(os.Stderr, "usage: weed help command\n\nToo many arguments given.\n") + os.Exit(2) // failed at 'weed help' + } + + arg := args[0] + + for _, cmd := range commands { + if cmd.Name() == arg { + tmpl(os.Stdout, helpTemplate, cmd) + // not exit 2: succeeded at 'weed help cmd'. + return + } + } + + fmt.Fprintf(os.Stderr, "Unknown help topic %#q. Run 'weed help'.\n", arg) + os.Exit(2) // failed at 'weed help cmd' +} + +var atexitFuncs []func() + +func atexit(f func()) { + atexitFuncs = append(atexitFuncs, f) +} + +func exit() { + for _, f := range atexitFuncs { + f() + } + os.Exit(exitStatus) +} + +func debug(params ...interface{}) { + glog.V(4).Infoln(params) +} |
