aboutsummaryrefslogtreecommitdiff
path: root/unmaintained/see_dat
diff options
context:
space:
mode:
authoringardm <ingard@jotta.no>2020-06-17 10:43:28 +0200
committerGitHub <noreply@github.com>2020-06-17 10:43:28 +0200
commitb7afa9fd57270e36a48bdf1b8b693274480658f2 (patch)
tree36d73c587267e8c6ae366945a726647bb962cc5a /unmaintained/see_dat
parent0ca4a6c4e6669b7725a80b04bae3de2b05e2431d (diff)
downloadseaweedfs-b7afa9fd57270e36a48bdf1b8b693274480658f2.tar.xz
seaweedfs-b7afa9fd57270e36a48bdf1b8b693274480658f2.zip
Create see_dat_gzip
Diffstat (limited to 'unmaintained/see_dat')
-rw-r--r--unmaintained/see_dat/see_dat_gzip83
1 files changed, 83 insertions, 0 deletions
diff --git a/unmaintained/see_dat/see_dat_gzip b/unmaintained/see_dat/see_dat_gzip
new file mode 100644
index 000000000..cec073e3f
--- /dev/null
+++ b/unmaintained/see_dat/see_dat_gzip
@@ -0,0 +1,83 @@
+package main
+
+import (
+ "bytes"
+ "compress/gzip"
+ "crypto/md5"
+ "flag"
+ "io"
+ "io/ioutil"
+ "net/http"
+ "time"
+ "github.com/chrislusf/seaweedfs/weed/glog"
+ "github.com/chrislusf/seaweedfs/weed/storage"
+ "github.com/chrislusf/seaweedfs/weed/storage/needle"
+ "github.com/chrislusf/seaweedfs/weed/storage/super_block"
+ "github.com/chrislusf/seaweedfs/weed/util"
+)
+
+type VolumeFileScanner4SeeDat struct {
+ version needle.Version
+}
+
+func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock super_block.SuperBlock) error {
+ scanner.version = superBlock.Version
+ return nil
+}
+
+func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool {
+ return true
+}
+
+var (
+ files = int64(0)
+ filebytes = int64(0)
+ diffbytes = int64(0)
+)
+
+func Compresssion(data []byte) float64 {
+ if len(data) <= 128 {
+ return 100.0
+ }
+ compressed, _ := util.GzipData(data[0:128])
+ return float64(len(compressed)*10) / 1280.0
+}
+
+func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error {
+ t := time.Unix(int64(n.AppendAtNs)/int64(time.Second), int64(n.AppendAtNs)%int64(time.Second))
+ glog.V(0).Info("----------------------------------------------------------------------------------")
+ glog.V(0).Infof("%d,%s%x offset %d size %d(%s) cookie %x appendedAt %v hasmime[%t] mime[%s] (len: %d)",
+ *volumeId, n.Id, n.Cookie, offset, n.Size, util.BytesToHumanReadable(uint64(n.Size)), n.Cookie, t, n.HasMime(), string(n.Mime), len(n.Mime))
+ r, err := gzip.NewReader(bytes.NewReader(n.Data))
+ if err == nil {
+ buf := bytes.Buffer{}
+ h := md5.New()
+ c, _ := io.Copy(&buf, r)
+ d := buf.Bytes()
+ io.Copy(h, bytes.NewReader(d))
+ diff := (int64(n.DataSize) - int64(c))
+ diffbytes += diff
+ glog.V(0).Infof("was gzip! stored_size: %d orig_size: %d diff: %d(%d) mime:%s compression-of-128: %.2f md5: %x", n.DataSize, c, diff, diffbytes, http.DetectContentType(d), Compresssion(d), h.Sum(nil))
+ } else {
+ glog.V(0).Infof("no gzip!")
+ }
+ return nil
+}
+
+var (
+ _ = ioutil.ReadAll
+ volumePath = flag.String("dir", "/tmp", "data directory to store files")
+ volumeCollection = flag.String("collection", "", "the volume collection name")
+ volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.")
+)
+
+func main() {
+ flag.Parse()
+ vid := needle.VolumeId(*volumeId)
+ glog.V(0).Info("Starting")
+ scanner := &VolumeFileScanner4SeeDat{}
+ err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner)
+ if err != nil {
+ glog.Fatalf("Reading Volume File [ERROR] %s\n", err)
+ }
+}