diff options
| author | bingoohuang <bingoo.huang@gmail.com> | 2021-04-26 17:19:35 +0800 |
|---|---|---|
| committer | bingoohuang <bingoo.huang@gmail.com> | 2021-04-26 17:19:35 +0800 |
| commit | d861cbd81b75b6684c971ac00e33685e6575b833 (patch) | |
| tree | 301805fef4aa5d0096bfb1510536f7a009b661e7 /weed/filer | |
| parent | 70da715d8d917527291b35fb069fac077d17b868 (diff) | |
| parent | 4ee58922eff61a5a4ca29c0b4829b097a498549e (diff) | |
| download | seaweedfs-d861cbd81b75b6684c971ac00e33685e6575b833.tar.xz seaweedfs-d861cbd81b75b6684c971ac00e33685e6575b833.zip | |
Merge branch 'master' of https://github.com/bingoohuang/seaweedfs
Diffstat (limited to 'weed/filer')
76 files changed, 9725 insertions, 0 deletions
diff --git a/weed/filer/abstract_sql/abstract_sql_store.go b/weed/filer/abstract_sql/abstract_sql_store.go new file mode 100644 index 000000000..ab8f6bcbd --- /dev/null +++ b/weed/filer/abstract_sql/abstract_sql_store.go @@ -0,0 +1,364 @@ +package abstract_sql + +import ( + "context" + "database/sql" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "strings" + "sync" +) + +type SqlGenerator interface { + GetSqlInsert(tableName string) string + GetSqlUpdate(tableName string) string + GetSqlFind(tableName string) string + GetSqlDelete(tableName string) string + GetSqlDeleteFolderChildren(tableName string) string + GetSqlListExclusive(tableName string) string + GetSqlListInclusive(tableName string) string + GetSqlCreateTable(tableName string) string + GetSqlDropTable(tableName string) string +} + +type AbstractSqlStore struct { + SqlGenerator + DB *sql.DB + SupportBucketTable bool + dbs map[string]bool + dbsLock sync.Mutex +} + +func (store *AbstractSqlStore) OnBucketCreation(bucket string) { + store.dbsLock.Lock() + defer store.dbsLock.Unlock() + + store.CreateTable(context.Background(), bucket) + + if store.dbs == nil { + return + } + store.dbs[bucket] = true +} +func (store *AbstractSqlStore) OnBucketDeletion(bucket string) { + store.dbsLock.Lock() + defer store.dbsLock.Unlock() + + store.deleteTable(context.Background(), bucket) + + if store.dbs == nil { + return + } + delete(store.dbs, bucket) +} + +const ( + DEFAULT_TABLE = "filemeta" +) + +type TxOrDB interface { + ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) + QueryRowContext(ctx context.Context, query string, args ...interface{}) *sql.Row + QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) +} + +func (store *AbstractSqlStore) BeginTransaction(ctx context.Context) (context.Context, error) { + tx, err := store.DB.BeginTx(ctx, &sql.TxOptions{ + Isolation: sql.LevelReadCommitted, + ReadOnly: false, + }) + if err != nil { + return ctx, err + } + + return context.WithValue(ctx, "tx", tx), nil +} +func (store *AbstractSqlStore) CommitTransaction(ctx context.Context) error { + if tx, ok := ctx.Value("tx").(*sql.Tx); ok { + return tx.Commit() + } + return nil +} +func (store *AbstractSqlStore) RollbackTransaction(ctx context.Context) error { + if tx, ok := ctx.Value("tx").(*sql.Tx); ok { + return tx.Rollback() + } + return nil +} + +func (store *AbstractSqlStore) getTxOrDB(ctx context.Context, fullpath util.FullPath, isForChildren bool) (txOrDB TxOrDB, bucket string, shortPath util.FullPath, err error) { + + shortPath = fullpath + bucket = DEFAULT_TABLE + + if tx, ok := ctx.Value("tx").(*sql.Tx); ok { + txOrDB = tx + } else { + txOrDB = store.DB + } + + if !store.SupportBucketTable { + return + } + + if !strings.HasPrefix(string(fullpath), "/buckets/") { + return + } + + // detect bucket + bucketAndObjectKey := string(fullpath)[len("/buckets/"):] + t := strings.Index(bucketAndObjectKey, "/") + if t < 0 && !isForChildren { + return + } + bucket = bucketAndObjectKey + shortPath = "/" + if t > 0 { + bucket = bucketAndObjectKey[:t] + shortPath = util.FullPath(bucketAndObjectKey[t:]) + } + + if isValidBucket(bucket) { + store.dbsLock.Lock() + defer store.dbsLock.Unlock() + + if store.dbs == nil { + store.dbs = make(map[string]bool) + } + + if _, found := store.dbs[bucket]; !found { + if err = store.CreateTable(ctx, bucket); err == nil { + store.dbs[bucket] = true + } + } + + } + + return +} + +func (store *AbstractSqlStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + + db, bucket, shortPath, err := store.getTxOrDB(ctx, entry.FullPath, false) + if err != nil { + return fmt.Errorf("findDB %s : %v", entry.FullPath, err) + } + + dir, name := shortPath.DirAndName() + meta, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encode %s: %s", entry.FullPath, err) + } + + if len(entry.Chunks) > 50 { + meta = util.MaybeGzipData(meta) + } + + res, err := db.ExecContext(ctx, store.GetSqlInsert(bucket), util.HashStringToLong(dir), name, dir, meta) + if err == nil { + return + } + + if !strings.Contains(strings.ToLower(err.Error()), "duplicate") { + // return fmt.Errorf("insert: %s", err) + // skip this since the error can be in a different language + } + + // now the insert failed possibly due to duplication constraints + glog.V(1).Infof("insert %s falls back to update: %v", entry.FullPath, err) + + res, err = db.ExecContext(ctx, store.GetSqlUpdate(bucket), meta, util.HashStringToLong(dir), name, dir) + if err != nil { + return fmt.Errorf("upsert %s: %s", entry.FullPath, err) + } + + _, err = res.RowsAffected() + if err != nil { + return fmt.Errorf("upsert %s but no rows affected: %s", entry.FullPath, err) + } + return nil + +} + +func (store *AbstractSqlStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + db, bucket, shortPath, err := store.getTxOrDB(ctx, entry.FullPath, false) + if err != nil { + return fmt.Errorf("findDB %s : %v", entry.FullPath, err) + } + + dir, name := shortPath.DirAndName() + meta, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encode %s: %s", entry.FullPath, err) + } + + res, err := db.ExecContext(ctx, store.GetSqlUpdate(bucket), meta, util.HashStringToLong(dir), name, dir) + if err != nil { + return fmt.Errorf("update %s: %s", entry.FullPath, err) + } + + _, err = res.RowsAffected() + if err != nil { + return fmt.Errorf("update %s but no rows affected: %s", entry.FullPath, err) + } + return nil +} + +func (store *AbstractSqlStore) FindEntry(ctx context.Context, fullpath util.FullPath) (*filer.Entry, error) { + + db, bucket, shortPath, err := store.getTxOrDB(ctx, fullpath, false) + if err != nil { + return nil, fmt.Errorf("findDB %s : %v", fullpath, err) + } + + dir, name := shortPath.DirAndName() + row := db.QueryRowContext(ctx, store.GetSqlFind(bucket), util.HashStringToLong(dir), name, dir) + + var data []byte + if err := row.Scan(&data); err != nil { + if err == sql.ErrNoRows { + return nil, filer_pb.ErrNotFound + } + return nil, fmt.Errorf("find %s: %v", fullpath, err) + } + + entry := &filer.Entry{ + FullPath: fullpath, + } + if err := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + return entry, nil +} + +func (store *AbstractSqlStore) DeleteEntry(ctx context.Context, fullpath util.FullPath) error { + + db, bucket, shortPath, err := store.getTxOrDB(ctx, fullpath, false) + if err != nil { + return fmt.Errorf("findDB %s : %v", fullpath, err) + } + + dir, name := shortPath.DirAndName() + + res, err := db.ExecContext(ctx, store.GetSqlDelete(bucket), util.HashStringToLong(dir), name, dir) + if err != nil { + return fmt.Errorf("delete %s: %s", fullpath, err) + } + + _, err = res.RowsAffected() + if err != nil { + return fmt.Errorf("delete %s but no rows affected: %s", fullpath, err) + } + + return nil +} + +func (store *AbstractSqlStore) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) error { + + db, bucket, shortPath, err := store.getTxOrDB(ctx, fullpath, true) + if err != nil { + return fmt.Errorf("findDB %s : %v", fullpath, err) + } + + if isValidBucket(bucket) && shortPath == "/" { + if err = store.deleteTable(ctx, bucket); err == nil { + store.dbsLock.Lock() + delete(store.dbs, bucket) + store.dbsLock.Unlock() + return nil + } else { + return err + } + } + + glog.V(4).Infof("delete %s SQL %s %d", string(shortPath), store.GetSqlDeleteFolderChildren(bucket), util.HashStringToLong(string(shortPath))) + + res, err := db.ExecContext(ctx, store.GetSqlDeleteFolderChildren(bucket), util.HashStringToLong(string(shortPath)), string(shortPath)) + if err != nil { + return fmt.Errorf("deleteFolderChildren %s: %s", fullpath, err) + } + + _, err = res.RowsAffected() + if err != nil { + return fmt.Errorf("deleteFolderChildren %s but no rows affected: %s", fullpath, err) + } + + return nil +} + +func (store *AbstractSqlStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + db, bucket, shortPath, err := store.getTxOrDB(ctx, dirPath, true) + if err != nil { + return lastFileName, fmt.Errorf("findDB %s : %v", dirPath, err) + } + + sqlText := store.GetSqlListExclusive(bucket) + if includeStartFile { + sqlText = store.GetSqlListInclusive(bucket) + } + + rows, err := db.QueryContext(ctx, sqlText, util.HashStringToLong(string(shortPath)), startFileName, string(shortPath), prefix+"%", limit+1) + if err != nil { + return lastFileName, fmt.Errorf("list %s : %v", dirPath, err) + } + defer rows.Close() + + for rows.Next() { + var name string + var data []byte + if err = rows.Scan(&name, &data); err != nil { + glog.V(0).Infof("scan %s : %v", dirPath, err) + return lastFileName, fmt.Errorf("scan %s: %v", dirPath, err) + } + lastFileName = name + + entry := &filer.Entry{ + FullPath: util.NewFullPath(string(dirPath), name), + } + if err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); err != nil { + glog.V(0).Infof("scan decode %s : %v", entry.FullPath, err) + return lastFileName, fmt.Errorf("scan decode %s : %v", entry.FullPath, err) + } + + if !eachEntryFunc(entry) { + break + } + + } + + return lastFileName, nil +} + +func (store *AbstractSqlStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", nil) +} + +func (store *AbstractSqlStore) Shutdown() { + store.DB.Close() +} + +func isValidBucket(bucket string) bool { + return bucket != DEFAULT_TABLE && bucket != "" +} + +func (store *AbstractSqlStore) CreateTable(ctx context.Context, bucket string) error { + if !store.SupportBucketTable { + return nil + } + _, err := store.DB.ExecContext(ctx, store.SqlGenerator.GetSqlCreateTable(bucket)) + return err +} + +func (store *AbstractSqlStore) deleteTable(ctx context.Context, bucket string) error { + if !store.SupportBucketTable { + return nil + } + _, err := store.DB.ExecContext(ctx, store.SqlGenerator.GetSqlDropTable(bucket)) + return err +} diff --git a/weed/filer/abstract_sql/abstract_sql_store_kv.go b/weed/filer/abstract_sql/abstract_sql_store_kv.go new file mode 100644 index 000000000..03b016c76 --- /dev/null +++ b/weed/filer/abstract_sql/abstract_sql_store_kv.go @@ -0,0 +1,105 @@ +package abstract_sql + +import ( + "context" + "database/sql" + "encoding/base64" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" + "strings" +) + +func (store *AbstractSqlStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + db, _, _, err := store.getTxOrDB(ctx, "", false) + if err != nil { + return fmt.Errorf("findDB: %v", err) + } + + dirStr, dirHash, name := genDirAndName(key) + + res, err := db.ExecContext(ctx, store.GetSqlInsert(DEFAULT_TABLE), dirHash, name, dirStr, value) + if err == nil { + return + } + + if !strings.Contains(strings.ToLower(err.Error()), "duplicate") { + // return fmt.Errorf("kv insert: %s", err) + // skip this since the error can be in a different language + } + + // now the insert failed possibly due to duplication constraints + glog.V(1).Infof("kv insert falls back to update: %s", err) + + res, err = db.ExecContext(ctx, store.GetSqlUpdate(DEFAULT_TABLE), value, dirHash, name, dirStr) + if err != nil { + return fmt.Errorf("kv upsert: %s", err) + } + + _, err = res.RowsAffected() + if err != nil { + return fmt.Errorf("kv upsert no rows affected: %s", err) + } + return nil + +} + +func (store *AbstractSqlStore) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + + db, _, _, err := store.getTxOrDB(ctx, "", false) + if err != nil { + return nil, fmt.Errorf("findDB: %v", err) + } + + dirStr, dirHash, name := genDirAndName(key) + row := db.QueryRowContext(ctx, store.GetSqlFind(DEFAULT_TABLE), dirHash, name, dirStr) + + err = row.Scan(&value) + + if err == sql.ErrNoRows { + return nil, filer.ErrKvNotFound + } + + if err != nil { + return nil, fmt.Errorf("kv get: %v", err) + } + + return +} + +func (store *AbstractSqlStore) KvDelete(ctx context.Context, key []byte) (err error) { + + db, _, _, err := store.getTxOrDB(ctx, "", false) + if err != nil { + return fmt.Errorf("findDB: %v", err) + } + + dirStr, dirHash, name := genDirAndName(key) + + res, err := db.ExecContext(ctx, store.GetSqlDelete(DEFAULT_TABLE), dirHash, name, dirStr) + if err != nil { + return fmt.Errorf("kv delete: %s", err) + } + + _, err = res.RowsAffected() + if err != nil { + return fmt.Errorf("kv delete no rows affected: %s", err) + } + + return nil + +} + +func genDirAndName(key []byte) (dirStr string, dirHash int64, name string) { + for len(key) < 8 { + key = append(key, 0) + } + + dirHash = int64(util.BytesToUint64(key[:8])) + dirStr = base64.StdEncoding.EncodeToString(key[:8]) + name = base64.StdEncoding.EncodeToString(key[8:]) + + return +} diff --git a/weed/filer/cassandra/README.txt b/weed/filer/cassandra/README.txt new file mode 100644 index 000000000..122c9c3f4 --- /dev/null +++ b/weed/filer/cassandra/README.txt @@ -0,0 +1,14 @@ +1. create a keyspace + +CREATE KEYSPACE seaweedfs WITH replication = {'class':'SimpleStrategy', 'replication_factor' : 1}; + +2. create filemeta table + + USE seaweedfs; + + CREATE TABLE filemeta ( + directory varchar, + name varchar, + meta blob, + PRIMARY KEY (directory, name) + ) WITH CLUSTERING ORDER BY (name ASC); diff --git a/weed/filer/cassandra/cassandra_store.go b/weed/filer/cassandra/cassandra_store.go new file mode 100644 index 000000000..fd2ce91a6 --- /dev/null +++ b/weed/filer/cassandra/cassandra_store.go @@ -0,0 +1,212 @@ +package cassandra + +import ( + "context" + "fmt" + "github.com/gocql/gocql" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func init() { + filer.Stores = append(filer.Stores, &CassandraStore{}) +} + +type CassandraStore struct { + cluster *gocql.ClusterConfig + session *gocql.Session + superLargeDirectoryHash map[string]string +} + +func (store *CassandraStore) GetName() string { + return "cassandra" +} + +func (store *CassandraStore) Initialize(configuration util.Configuration, prefix string) (err error) { + return store.initialize( + configuration.GetString(prefix+"keyspace"), + configuration.GetStringSlice(prefix+"hosts"), + configuration.GetString(prefix+"username"), + configuration.GetString(prefix+"password"), + configuration.GetStringSlice(prefix+"superLargeDirectories"), + ) +} + +func (store *CassandraStore) isSuperLargeDirectory(dir string) (dirHash string, isSuperLargeDirectory bool) { + dirHash, isSuperLargeDirectory = store.superLargeDirectoryHash[dir] + return +} + +func (store *CassandraStore) initialize(keyspace string, hosts []string, username string, password string, superLargeDirectories []string) (err error) { + store.cluster = gocql.NewCluster(hosts...) + if username != "" && password != "" { + store.cluster.Authenticator = gocql.PasswordAuthenticator{Username: username, Password: password} + } + store.cluster.Keyspace = keyspace + store.cluster.Consistency = gocql.LocalQuorum + store.session, err = store.cluster.CreateSession() + if err != nil { + glog.V(0).Infof("Failed to open cassandra store, hosts %v, keyspace %s", hosts, keyspace) + } + + // set directory hash + store.superLargeDirectoryHash = make(map[string]string) + existingHash := make(map[string]string) + for _, dir := range superLargeDirectories { + // adding dir hash to avoid duplicated names + dirHash := util.Md5String([]byte(dir))[:4] + store.superLargeDirectoryHash[dir] = dirHash + if existingDir, found := existingHash[dirHash]; found { + glog.Fatalf("directory %s has the same hash as %s", dir, existingDir) + } + existingHash[dirHash] = dir + } + return +} + +func (store *CassandraStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *CassandraStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *CassandraStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *CassandraStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + + dir, name := entry.FullPath.DirAndName() + if dirHash, ok := store.isSuperLargeDirectory(dir); ok { + dir, name = dirHash+name, "" + } + + meta, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encode %s: %s", entry.FullPath, err) + } + + if len(entry.Chunks) > 50 { + meta = util.MaybeGzipData(meta) + } + + if err := store.session.Query( + "INSERT INTO filemeta (directory,name,meta) VALUES(?,?,?) USING TTL ? ", + dir, name, meta, entry.TtlSec).Exec(); err != nil { + return fmt.Errorf("insert %s: %s", entry.FullPath, err) + } + + return nil +} + +func (store *CassandraStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + return store.InsertEntry(ctx, entry) +} + +func (store *CassandraStore) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer.Entry, err error) { + + dir, name := fullpath.DirAndName() + if dirHash, ok := store.isSuperLargeDirectory(dir); ok { + dir, name = dirHash+name, "" + } + + var data []byte + if err := store.session.Query( + "SELECT meta FROM filemeta WHERE directory=? AND name=?", + dir, name).Consistency(gocql.One).Scan(&data); err != nil { + if err != gocql.ErrNotFound { + return nil, filer_pb.ErrNotFound + } + } + + if len(data) == 0 { + return nil, filer_pb.ErrNotFound + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + return entry, nil +} + +func (store *CassandraStore) DeleteEntry(ctx context.Context, fullpath util.FullPath) error { + + dir, name := fullpath.DirAndName() + if dirHash, ok := store.isSuperLargeDirectory(dir); ok { + dir, name = dirHash+name, "" + } + + if err := store.session.Query( + "DELETE FROM filemeta WHERE directory=? AND name=?", + dir, name).Exec(); err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *CassandraStore) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) error { + if _, ok := store.isSuperLargeDirectory(string(fullpath)); ok { + return nil // filer.ErrUnsupportedSuperLargeDirectoryListing + } + + if err := store.session.Query( + "DELETE FROM filemeta WHERE directory=?", + fullpath).Exec(); err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *CassandraStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return lastFileName, filer.ErrUnsupportedListDirectoryPrefixed +} + +func (store *CassandraStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + if _, ok := store.isSuperLargeDirectory(string(dirPath)); ok { + return // nil, filer.ErrUnsupportedSuperLargeDirectoryListing + } + + cqlStr := "SELECT NAME, meta FROM filemeta WHERE directory=? AND name>? ORDER BY NAME ASC LIMIT ?" + if includeStartFile { + cqlStr = "SELECT NAME, meta FROM filemeta WHERE directory=? AND name>=? ORDER BY NAME ASC LIMIT ?" + } + + var data []byte + var name string + iter := store.session.Query(cqlStr, string(dirPath), startFileName, limit+1).Iter() + for iter.Scan(&name, &data) { + entry := &filer.Entry{ + FullPath: util.NewFullPath(string(dirPath), name), + } + lastFileName = name + if decodeErr := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data)); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + break + } + if !eachEntryFunc(entry) { + break + } + } + if err := iter.Close(); err != nil { + glog.V(0).Infof("list iterator close: %v", err) + } + + return lastFileName, err +} + +func (store *CassandraStore) Shutdown() { + store.session.Close() +} diff --git a/weed/filer/cassandra/cassandra_store_kv.go b/weed/filer/cassandra/cassandra_store_kv.go new file mode 100644 index 000000000..dafa9bb15 --- /dev/null +++ b/weed/filer/cassandra/cassandra_store_kv.go @@ -0,0 +1,62 @@ +package cassandra + +import ( + "context" + "encoding/base64" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/gocql/gocql" +) + +func (store *CassandraStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + dir, name := genDirAndName(key) + + if err := store.session.Query( + "INSERT INTO filemeta (directory,name,meta) VALUES(?,?,?) USING TTL ? ", + dir, name, value, 0).Exec(); err != nil { + return fmt.Errorf("kv insert: %s", err) + } + + return nil +} + +func (store *CassandraStore) KvGet(ctx context.Context, key []byte) (data []byte, err error) { + dir, name := genDirAndName(key) + + if err := store.session.Query( + "SELECT meta FROM filemeta WHERE directory=? AND name=?", + dir, name).Consistency(gocql.One).Scan(&data); err != nil { + if err != gocql.ErrNotFound { + return nil, filer.ErrKvNotFound + } + } + + if len(data) == 0 { + return nil, filer.ErrKvNotFound + } + + return data, nil +} + +func (store *CassandraStore) KvDelete(ctx context.Context, key []byte) (err error) { + dir, name := genDirAndName(key) + + if err := store.session.Query( + "DELETE FROM filemeta WHERE directory=? AND name=?", + dir, name).Exec(); err != nil { + return fmt.Errorf("kv delete: %v", err) + } + + return nil +} + +func genDirAndName(key []byte) (dir string, name string) { + for len(key) < 8 { + key = append(key, 0) + } + + dir = base64.StdEncoding.EncodeToString(key[:8]) + name = base64.StdEncoding.EncodeToString(key[8:]) + + return +} diff --git a/weed/filer/configuration.go b/weed/filer/configuration.go new file mode 100644 index 000000000..9ef2f3e0f --- /dev/null +++ b/weed/filer/configuration.go @@ -0,0 +1,93 @@ +package filer + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" + "os" + "reflect" + "strings" +) + +var ( + Stores []FilerStore +) + +func (f *Filer) LoadConfiguration(config *util.ViperProxy) { + + validateOneEnabledStore(config) + + // load configuration for default filer store + hasDefaultStoreConfigured := false + for _, store := range Stores { + if config.GetBool(store.GetName() + ".enabled") { + store = reflect.New(reflect.ValueOf(store).Elem().Type()).Interface().(FilerStore) + if err := store.Initialize(config, store.GetName()+"."); err != nil { + glog.Fatalf("failed to initialize store for %s: %+v", store.GetName(), err) + } + f.SetStore(store) + glog.V(0).Infof("configured filer store to %s", store.GetName()) + hasDefaultStoreConfigured = true + break + } + } + + if !hasDefaultStoreConfigured { + println() + println("Supported filer stores are:") + for _, store := range Stores { + println(" " + store.GetName()) + } + os.Exit(-1) + } + + // load path-specific filer store here + // f.Store.AddPathSpecificStore(path, store) + storeNames := make(map[string]FilerStore) + for _, store := range Stores { + storeNames[store.GetName()] = store + } + allKeys := config.AllKeys() + for _, key := range allKeys { + if !strings.HasSuffix(key, ".enabled") { + continue + } + key = key[:len(key)-len(".enabled")] + if !strings.Contains(key, ".") { + continue + } + + parts := strings.Split(key, ".") + storeName, storeId := parts[0], parts[1] + + store, found := storeNames[storeName] + if !found { + continue + } + store = reflect.New(reflect.ValueOf(store).Elem().Type()).Interface().(FilerStore) + if err := store.Initialize(config, key+"."); err != nil { + glog.Fatalf("Failed to initialize store for %s: %+v", key, err) + } + location := config.GetString(key + ".location") + if location == "" { + glog.Errorf("path-specific filer store needs %s", key+".location") + os.Exit(-1) + } + f.Store.AddPathSpecificStore(location, storeId, store) + + glog.V(0).Infof("configure filer %s for %s", store.GetName(), location) + } + +} + +func validateOneEnabledStore(config *util.ViperProxy) { + enabledStore := "" + for _, store := range Stores { + if config.GetBool(store.GetName() + ".enabled") { + if enabledStore == "" { + enabledStore = store.GetName() + } else { + glog.Fatalf("Filer store is enabled for both %s and %s", enabledStore, store.GetName()) + } + } + } +} diff --git a/weed/filer/elastic/v7/elastic_store.go b/weed/filer/elastic/v7/elastic_store.go new file mode 100644 index 000000000..a16e5ebca --- /dev/null +++ b/weed/filer/elastic/v7/elastic_store.go @@ -0,0 +1,307 @@ +package elastic + +import ( + "context" + "fmt" + "math" + "strings" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + weed_util "github.com/chrislusf/seaweedfs/weed/util" + jsoniter "github.com/json-iterator/go" + elastic "github.com/olivere/elastic/v7" +) + +var ( + indexType = "_doc" + indexPrefix = ".seaweedfs_" + indexKV = ".seaweedfs_kv_entries" + kvMappings = ` { + "mappings": { + "enabled": false, + "properties": { + "Value":{ + "type": "binary" + } + } + } + }` +) + +type ESEntry struct { + ParentId string `json:"ParentId"` + Entry *filer.Entry +} + +type ESKVEntry struct { + Value []byte `json:"Value"` +} + +func init() { + filer.Stores = append(filer.Stores, &ElasticStore{}) +} + +type ElasticStore struct { + client *elastic.Client + maxPageSize int +} + +func (store *ElasticStore) GetName() string { + return "elastic7" +} + +func (store *ElasticStore) Initialize(configuration weed_util.Configuration, prefix string) (err error) { + options := []elastic.ClientOptionFunc{} + servers := configuration.GetStringSlice(prefix + "servers") + options = append(options, elastic.SetURL(servers...)) + username := configuration.GetString(prefix + "username") + password := configuration.GetString(prefix + "password") + if username != "" && password != "" { + options = append(options, elastic.SetBasicAuth(username, password)) + } + options = append(options, elastic.SetSniff(configuration.GetBool(prefix+"sniff_enabled"))) + options = append(options, elastic.SetHealthcheck(configuration.GetBool(prefix+"healthcheck_enabled"))) + store.maxPageSize = configuration.GetInt(prefix + "index.max_result_window") + if store.maxPageSize <= 0 { + store.maxPageSize = 10000 + } + glog.Infof("filer store elastic endpoints: %v.", servers) + return store.initialize(options) +} + +func (store *ElasticStore) initialize(options []elastic.ClientOptionFunc) (err error) { + ctx := context.Background() + store.client, err = elastic.NewClient(options...) + if err != nil { + return fmt.Errorf("init elastic %v.", err) + } + if ok, err := store.client.IndexExists(indexKV).Do(ctx); err == nil && !ok { + _, err = store.client.CreateIndex(indexKV).Body(kvMappings).Do(ctx) + if err != nil { + return fmt.Errorf("create index(%s) %v.", indexKV, err) + } + } + return nil +} + +func (store *ElasticStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *ElasticStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *ElasticStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *ElasticStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return lastFileName, filer.ErrUnsupportedListDirectoryPrefixed +} + +func (store *ElasticStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + index := getIndex(entry.FullPath, false) + dir, _ := entry.FullPath.DirAndName() + id := weed_util.Md5String([]byte(entry.FullPath)) + esEntry := &ESEntry{ + ParentId: weed_util.Md5String([]byte(dir)), + Entry: entry, + } + value, err := jsoniter.Marshal(esEntry) + if err != nil { + glog.Errorf("insert entry(%s) %v.", string(entry.FullPath), err) + return fmt.Errorf("insert entry %v.", err) + } + _, err = store.client.Index(). + Index(index). + Type(indexType). + Id(id). + BodyJson(string(value)). + Do(ctx) + if err != nil { + glog.Errorf("insert entry(%s) %v.", string(entry.FullPath), err) + return fmt.Errorf("insert entry %v.", err) + } + return nil +} + +func (store *ElasticStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + return store.InsertEntry(ctx, entry) +} + +func (store *ElasticStore) FindEntry(ctx context.Context, fullpath weed_util.FullPath) (entry *filer.Entry, err error) { + index := getIndex(fullpath, false) + id := weed_util.Md5String([]byte(fullpath)) + searchResult, err := store.client.Get(). + Index(index). + Type(indexType). + Id(id). + Do(ctx) + if elastic.IsNotFound(err) { + return nil, filer_pb.ErrNotFound + } + if searchResult != nil && searchResult.Found { + esEntry := &ESEntry{ + ParentId: "", + Entry: &filer.Entry{}, + } + err := jsoniter.Unmarshal(searchResult.Source, esEntry) + return esEntry.Entry, err + } + glog.Errorf("find entry(%s),%v.", string(fullpath), err) + return nil, filer_pb.ErrNotFound +} + +func (store *ElasticStore) DeleteEntry(ctx context.Context, fullpath weed_util.FullPath) (err error) { + index := getIndex(fullpath, false) + id := weed_util.Md5String([]byte(fullpath)) + if strings.Count(string(fullpath), "/") == 1 { + return store.deleteIndex(ctx, index) + } + return store.deleteEntry(ctx, index, id) +} + +func (store *ElasticStore) deleteIndex(ctx context.Context, index string) (err error) { + deleteResult, err := store.client.DeleteIndex(index).Do(ctx) + if elastic.IsNotFound(err) || (err == nil && deleteResult.Acknowledged) { + return nil + } + glog.Errorf("delete index(%s) %v.", index, err) + return err +} + +func (store *ElasticStore) deleteEntry(ctx context.Context, index, id string) (err error) { + deleteResult, err := store.client.Delete(). + Index(index). + Type(indexType). + Id(id). + Do(ctx) + if err == nil { + if deleteResult.Result == "deleted" || deleteResult.Result == "not_found" { + return nil + } + } + glog.Errorf("delete entry(index:%s,_id:%s) %v.", index, id, err) + return fmt.Errorf("delete entry %v.", err) +} + +func (store *ElasticStore) DeleteFolderChildren(ctx context.Context, fullpath weed_util.FullPath) (err error) { + _, err = store.ListDirectoryEntries(ctx, fullpath, "", false, math.MaxInt32, func(entry *filer.Entry) bool { + if err := store.DeleteEntry(ctx, entry.FullPath); err != nil { + glog.Errorf("elastic delete %s: %v.", entry.FullPath, err) + return false + } + return true + }) + return +} + +func (store *ElasticStore) ListDirectoryEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return store.listDirectoryEntries(ctx, dirPath, startFileName, includeStartFile, limit, eachEntryFunc) +} + +func (store *ElasticStore) listDirectoryEntries( + ctx context.Context, fullpath weed_util.FullPath, startFileName string, inclusive bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + first := true + index := getIndex(fullpath, true) + nextStart := "" + parentId := weed_util.Md5String([]byte(fullpath)) + if _, err = store.client.Refresh(index).Do(ctx); err != nil { + if elastic.IsNotFound(err) { + store.client.CreateIndex(index).Do(ctx) + return + } + } + for { + result := &elastic.SearchResult{} + if (startFileName == "" && first) || inclusive { + if result, err = store.search(ctx, index, parentId); err != nil { + glog.Errorf("search (%s,%s,%t,%d) %v.", string(fullpath), startFileName, inclusive, limit, err) + return + } + } else { + fullPath := string(fullpath) + "/" + startFileName + if !first { + fullPath = nextStart + } + after := weed_util.Md5String([]byte(fullPath)) + if result, err = store.searchAfter(ctx, index, parentId, after); err != nil { + glog.Errorf("searchAfter (%s,%s,%t,%d) %v.", string(fullpath), startFileName, inclusive, limit, err) + return + } + } + first = false + for _, hit := range result.Hits.Hits { + esEntry := &ESEntry{ + ParentId: "", + Entry: &filer.Entry{}, + } + if err := jsoniter.Unmarshal(hit.Source, esEntry); err == nil { + limit-- + if limit < 0 { + return lastFileName, nil + } + nextStart = string(esEntry.Entry.FullPath) + fileName := esEntry.Entry.FullPath.Name() + if fileName == startFileName && !inclusive { + continue + } + if !eachEntryFunc(esEntry.Entry) { + break + } + lastFileName = fileName + } + } + if len(result.Hits.Hits) < store.maxPageSize { + break + } + } + return +} + +func (store *ElasticStore) search(ctx context.Context, index, parentId string) (result *elastic.SearchResult, err error) { + if count, err := store.client.Count(index).Do(ctx); err == nil && count == 0 { + return &elastic.SearchResult{ + Hits: &elastic.SearchHits{ + Hits: make([]*elastic.SearchHit, 0)}, + }, nil + } + queryResult, err := store.client.Search(). + Index(index). + Query(elastic.NewMatchQuery("ParentId", parentId)). + Size(store.maxPageSize). + Sort("_id", false). + Do(ctx) + return queryResult, err +} + +func (store *ElasticStore) searchAfter(ctx context.Context, index, parentId, after string) (result *elastic.SearchResult, err error) { + queryResult, err := store.client.Search(). + Index(index). + Query(elastic.NewMatchQuery("ParentId", parentId)). + SearchAfter(after). + Size(store.maxPageSize). + Sort("_id", false). + Do(ctx) + return queryResult, err + +} + +func (store *ElasticStore) Shutdown() { + store.client.Stop() +} + +func getIndex(fullpath weed_util.FullPath, isDirectory bool) string { + path := strings.Split(string(fullpath), "/") + if isDirectory && len(path) >= 2 { + return indexPrefix + strings.ToLower(path[1]) + } + if len(path) > 2 { + return indexPrefix + strings.ToLower(path[1]) + } + if len(path) == 2 { + return indexPrefix + } + return "" +} diff --git a/weed/filer/elastic/v7/elastic_store_kv.go b/weed/filer/elastic/v7/elastic_store_kv.go new file mode 100644 index 000000000..99c03314e --- /dev/null +++ b/weed/filer/elastic/v7/elastic_store_kv.go @@ -0,0 +1,65 @@ +package elastic + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer" + + "github.com/chrislusf/seaweedfs/weed/glog" + jsoniter "github.com/json-iterator/go" + elastic "github.com/olivere/elastic/v7" +) + +func (store *ElasticStore) KvDelete(ctx context.Context, key []byte) (err error) { + deleteResult, err := store.client.Delete(). + Index(indexKV). + Type(indexType). + Id(string(key)). + Do(ctx) + if err == nil { + if deleteResult.Result == "deleted" || deleteResult.Result == "not_found" { + return nil + } + } + glog.Errorf("delete key(id:%s) %v.", string(key), err) + return fmt.Errorf("delete key %v.", err) +} + +func (store *ElasticStore) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + searchResult, err := store.client.Get(). + Index(indexKV). + Type(indexType). + Id(string(key)). + Do(ctx) + if elastic.IsNotFound(err) { + return value, filer.ErrKvNotFound + } + if searchResult != nil && searchResult.Found { + esEntry := &ESKVEntry{} + if err := jsoniter.Unmarshal(searchResult.Source, esEntry); err == nil { + return esEntry.Value, nil + } + } + glog.Errorf("find key(%s),%v.", string(key), err) + return value, filer.ErrKvNotFound +} + +func (store *ElasticStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + esEntry := &ESKVEntry{value} + val, err := jsoniter.Marshal(esEntry) + if err != nil { + glog.Errorf("insert key(%s) %v.", string(key), err) + return fmt.Errorf("insert key %v.", err) + } + _, err = store.client.Index(). + Index(indexKV). + Type(indexType). + Id(string(key)). + BodyJson(string(val)). + Do(ctx) + if err != nil { + return fmt.Errorf("kv put: %v", err) + } + return nil +} diff --git a/weed/filer/entry.go b/weed/filer/entry.go new file mode 100644 index 000000000..b7c8370e6 --- /dev/null +++ b/weed/filer/entry.go @@ -0,0 +1,113 @@ +package filer + +import ( + "os" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type Attr struct { + Mtime time.Time // time of last modification + Crtime time.Time // time of creation (OS X only) + Mode os.FileMode // file mode + Uid uint32 // owner uid + Gid uint32 // group gid + Mime string // mime type + Replication string // replication + Collection string // collection name + TtlSec int32 // ttl in seconds + DiskType string + UserName string + GroupNames []string + SymlinkTarget string + Md5 []byte + FileSize uint64 +} + +func (attr Attr) IsDirectory() bool { + return attr.Mode&os.ModeDir > 0 +} + +type Entry struct { + util.FullPath + + Attr + Extended map[string][]byte + + // the following is for files + Chunks []*filer_pb.FileChunk `json:"chunks,omitempty"` + + HardLinkId HardLinkId + HardLinkCounter int32 + Content []byte +} + +func (entry *Entry) Size() uint64 { + return maxUint64(maxUint64(TotalSize(entry.Chunks), entry.FileSize), uint64(len(entry.Content))) +} + +func (entry *Entry) Timestamp() time.Time { + if entry.IsDirectory() { + return entry.Crtime + } else { + return entry.Mtime + } +} + +func (entry *Entry) ToProtoEntry() *filer_pb.Entry { + if entry == nil { + return nil + } + return &filer_pb.Entry{ + Name: entry.FullPath.Name(), + IsDirectory: entry.IsDirectory(), + Attributes: EntryAttributeToPb(entry), + Chunks: entry.Chunks, + Extended: entry.Extended, + HardLinkId: entry.HardLinkId, + HardLinkCounter: entry.HardLinkCounter, + Content: entry.Content, + } +} + +func (entry *Entry) ToProtoFullEntry() *filer_pb.FullEntry { + if entry == nil { + return nil + } + dir, _ := entry.FullPath.DirAndName() + return &filer_pb.FullEntry{ + Dir: dir, + Entry: entry.ToProtoEntry(), + } +} + +func (entry *Entry) Clone() *Entry { + return &Entry{ + FullPath: entry.FullPath, + Attr: entry.Attr, + Chunks: entry.Chunks, + Extended: entry.Extended, + HardLinkId: entry.HardLinkId, + HardLinkCounter: entry.HardLinkCounter, + } +} + +func FromPbEntry(dir string, entry *filer_pb.Entry) *Entry { + return &Entry{ + FullPath: util.NewFullPath(dir, entry.Name), + Attr: PbToEntryAttribute(entry.Attributes), + Chunks: entry.Chunks, + HardLinkId: HardLinkId(entry.HardLinkId), + HardLinkCounter: entry.HardLinkCounter, + Content: entry.Content, + } +} + +func maxUint64(x, y uint64) uint64 { + if x > y { + return x + } + return y +} diff --git a/weed/filer/entry_codec.go b/weed/filer/entry_codec.go new file mode 100644 index 000000000..4c613f068 --- /dev/null +++ b/weed/filer/entry_codec.go @@ -0,0 +1,147 @@ +package filer + +import ( + "bytes" + "fmt" + "os" + "time" + + "github.com/golang/protobuf/proto" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func (entry *Entry) EncodeAttributesAndChunks() ([]byte, error) { + message := &filer_pb.Entry{ + Attributes: EntryAttributeToPb(entry), + Chunks: entry.Chunks, + Extended: entry.Extended, + HardLinkId: entry.HardLinkId, + HardLinkCounter: entry.HardLinkCounter, + Content: entry.Content, + } + return proto.Marshal(message) +} + +func (entry *Entry) DecodeAttributesAndChunks(blob []byte) error { + + message := &filer_pb.Entry{} + + if err := proto.UnmarshalMerge(blob, message); err != nil { + return fmt.Errorf("decoding value blob for %s: %v", entry.FullPath, err) + } + + entry.Attr = PbToEntryAttribute(message.Attributes) + + entry.Extended = message.Extended + + entry.Chunks = message.Chunks + + entry.HardLinkId = message.HardLinkId + entry.HardLinkCounter = message.HardLinkCounter + entry.Content = message.Content + + return nil +} + +func EntryAttributeToPb(entry *Entry) *filer_pb.FuseAttributes { + + return &filer_pb.FuseAttributes{ + Crtime: entry.Attr.Crtime.Unix(), + Mtime: entry.Attr.Mtime.Unix(), + FileMode: uint32(entry.Attr.Mode), + Uid: entry.Uid, + Gid: entry.Gid, + Mime: entry.Mime, + Collection: entry.Attr.Collection, + Replication: entry.Attr.Replication, + TtlSec: entry.Attr.TtlSec, + DiskType: entry.Attr.DiskType, + UserName: entry.Attr.UserName, + GroupName: entry.Attr.GroupNames, + SymlinkTarget: entry.Attr.SymlinkTarget, + Md5: entry.Attr.Md5, + FileSize: entry.Attr.FileSize, + } +} + +func PbToEntryAttribute(attr *filer_pb.FuseAttributes) Attr { + + t := Attr{} + + if attr == nil { + return t + } + + t.Crtime = time.Unix(attr.Crtime, 0) + t.Mtime = time.Unix(attr.Mtime, 0) + t.Mode = os.FileMode(attr.FileMode) + t.Uid = attr.Uid + t.Gid = attr.Gid + t.Mime = attr.Mime + t.Collection = attr.Collection + t.Replication = attr.Replication + t.TtlSec = attr.TtlSec + t.DiskType = attr.DiskType + t.UserName = attr.UserName + t.GroupNames = attr.GroupName + t.SymlinkTarget = attr.SymlinkTarget + t.Md5 = attr.Md5 + t.FileSize = attr.FileSize + + return t +} + +func EqualEntry(a, b *Entry) bool { + if a == b { + return true + } + if a == nil && b != nil || a != nil && b == nil { + return false + } + if !proto.Equal(EntryAttributeToPb(a), EntryAttributeToPb(b)) { + return false + } + if len(a.Chunks) != len(b.Chunks) { + return false + } + + if !eq(a.Extended, b.Extended) { + return false + } + + if !bytes.Equal(a.Md5, b.Md5) { + return false + } + + for i := 0; i < len(a.Chunks); i++ { + if !proto.Equal(a.Chunks[i], b.Chunks[i]) { + return false + } + } + + if !bytes.Equal(a.HardLinkId, b.HardLinkId) { + return false + } + if a.HardLinkCounter != b.HardLinkCounter { + return false + } + if !bytes.Equal(a.Content, b.Content) { + return false + } + return true +} + +func eq(a, b map[string][]byte) bool { + if len(a) != len(b) { + return false + } + + for k, v := range a { + if w, ok := b[k]; !ok || !bytes.Equal(v, w) { + return false + } + } + + return true +} diff --git a/weed/filer/etcd/etcd_store.go b/weed/filer/etcd/etcd_store.go new file mode 100644 index 000000000..71ed738f9 --- /dev/null +++ b/weed/filer/etcd/etcd_store.go @@ -0,0 +1,219 @@ +package etcd + +import ( + "bytes" + "context" + "fmt" + "strings" + "time" + + "go.etcd.io/etcd/clientv3" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + weed_util "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + DIR_FILE_SEPARATOR = byte(0x00) +) + +func init() { + filer.Stores = append(filer.Stores, &EtcdStore{}) +} + +type EtcdStore struct { + client *clientv3.Client +} + +func (store *EtcdStore) GetName() string { + return "etcd" +} + +func (store *EtcdStore) Initialize(configuration weed_util.Configuration, prefix string) (err error) { + servers := configuration.GetString(prefix + "servers") + if servers == "" { + servers = "localhost:2379" + } + + timeout := configuration.GetString(prefix + "timeout") + if timeout == "" { + timeout = "3s" + } + + return store.initialize(servers, timeout) +} + +func (store *EtcdStore) initialize(servers string, timeout string) (err error) { + glog.Infof("filer store etcd: %s", servers) + + to, err := time.ParseDuration(timeout) + if err != nil { + return fmt.Errorf("parse timeout %s: %s", timeout, err) + } + + store.client, err = clientv3.New(clientv3.Config{ + Endpoints: strings.Split(servers, ","), + DialTimeout: to, + }) + if err != nil { + return fmt.Errorf("connect to etcd %s: %s", servers, err) + } + + return +} + +func (store *EtcdStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *EtcdStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *EtcdStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *EtcdStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + key := genKey(entry.DirAndName()) + + meta, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + if len(entry.Chunks) > 50 { + meta = weed_util.MaybeGzipData(meta) + } + + if _, err := store.client.Put(ctx, string(key), string(meta)); err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + return nil +} + +func (store *EtcdStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + return store.InsertEntry(ctx, entry) +} + +func (store *EtcdStore) FindEntry(ctx context.Context, fullpath weed_util.FullPath) (entry *filer.Entry, err error) { + key := genKey(fullpath.DirAndName()) + + resp, err := store.client.Get(ctx, string(key)) + if err != nil { + return nil, fmt.Errorf("get %s : %v", fullpath, err) + } + + if len(resp.Kvs) == 0 { + return nil, filer_pb.ErrNotFound + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(weed_util.MaybeDecompressData(resp.Kvs[0].Value)) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + return entry, nil +} + +func (store *EtcdStore) DeleteEntry(ctx context.Context, fullpath weed_util.FullPath) (err error) { + key := genKey(fullpath.DirAndName()) + + if _, err := store.client.Delete(ctx, string(key)); err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *EtcdStore) DeleteFolderChildren(ctx context.Context, fullpath weed_util.FullPath) (err error) { + directoryPrefix := genDirectoryKeyPrefix(fullpath, "") + + if _, err := store.client.Delete(ctx, string(directoryPrefix), clientv3.WithPrefix()); err != nil { + return fmt.Errorf("deleteFolderChildren %s : %v", fullpath, err) + } + + return nil +} + +func (store *EtcdStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return lastFileName, filer.ErrUnsupportedListDirectoryPrefixed +} + +func (store *EtcdStore) ListDirectoryEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + directoryPrefix := genDirectoryKeyPrefix(dirPath, "") + lastFileStart := directoryPrefix + if startFileName != "" { + lastFileStart = genDirectoryKeyPrefix(dirPath, startFileName) + } + + resp, err := store.client.Get(ctx, string(lastFileStart), + clientv3.WithPrefix(), clientv3.WithSort(clientv3.SortByKey, clientv3.SortDescend)) + if err != nil { + return lastFileName, fmt.Errorf("list %s : %v", dirPath, err) + } + + for _, kv := range resp.Kvs { + if !bytes.HasPrefix(kv.Key, directoryPrefix) { + break + } + fileName := getNameFromKey(kv.Key) + if fileName == "" { + continue + } + if fileName == startFileName && !includeStartFile { + continue + } + limit-- + if limit < 0 { + break + } + entry := &filer.Entry{ + FullPath: weed_util.NewFullPath(string(dirPath), fileName), + } + if decodeErr := entry.DecodeAttributesAndChunks(weed_util.MaybeDecompressData(kv.Value)); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + break + } + if !eachEntryFunc(entry) { + break + } + lastFileName = fileName + } + + return lastFileName, err +} + +func genKey(dirPath, fileName string) (key []byte) { + key = []byte(dirPath) + key = append(key, DIR_FILE_SEPARATOR) + key = append(key, []byte(fileName)...) + return key +} + +func genDirectoryKeyPrefix(fullpath weed_util.FullPath, startFileName string) (keyPrefix []byte) { + keyPrefix = []byte(string(fullpath)) + keyPrefix = append(keyPrefix, DIR_FILE_SEPARATOR) + if len(startFileName) > 0 { + keyPrefix = append(keyPrefix, []byte(startFileName)...) + } + return keyPrefix +} + +func getNameFromKey(key []byte) string { + sepIndex := len(key) - 1 + for sepIndex >= 0 && key[sepIndex] != DIR_FILE_SEPARATOR { + sepIndex-- + } + + return string(key[sepIndex+1:]) +} + +func (store *EtcdStore) Shutdown() { + store.client.Close() +} diff --git a/weed/filer/etcd/etcd_store_kv.go b/weed/filer/etcd/etcd_store_kv.go new file mode 100644 index 000000000..df252f46c --- /dev/null +++ b/weed/filer/etcd/etcd_store_kv.go @@ -0,0 +1,44 @@ +package etcd + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" +) + +func (store *EtcdStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + _, err = store.client.Put(ctx, string(key), string(value)) + + if err != nil { + return fmt.Errorf("kv put: %v", err) + } + + return nil +} + +func (store *EtcdStore) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + + resp, err := store.client.Get(ctx, string(key)) + + if err != nil { + return nil, fmt.Errorf("kv get: %v", err) + } + + if len(resp.Kvs) == 0 { + return nil, filer.ErrKvNotFound + } + + return resp.Kvs[0].Value, nil +} + +func (store *EtcdStore) KvDelete(ctx context.Context, key []byte) (err error) { + + _, err = store.client.Delete(ctx, string(key)) + + if err != nil { + return fmt.Errorf("kv delete: %v", err) + } + + return nil +} diff --git a/weed/filer/filechunk_manifest.go b/weed/filer/filechunk_manifest.go new file mode 100644 index 000000000..c709dc819 --- /dev/null +++ b/weed/filer/filechunk_manifest.go @@ -0,0 +1,194 @@ +package filer + +import ( + "bytes" + "fmt" + "github.com/chrislusf/seaweedfs/weed/wdclient" + "io" + "math" + "time" + + "github.com/golang/protobuf/proto" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + ManifestBatch = 1000 +) + +func HasChunkManifest(chunks []*filer_pb.FileChunk) bool { + for _, chunk := range chunks { + if chunk.IsChunkManifest { + return true + } + } + return false +} + +func SeparateManifestChunks(chunks []*filer_pb.FileChunk) (manifestChunks, nonManifestChunks []*filer_pb.FileChunk) { + for _, c := range chunks { + if c.IsChunkManifest { + manifestChunks = append(manifestChunks, c) + } else { + nonManifestChunks = append(nonManifestChunks, c) + } + } + return +} + +func ResolveChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) (dataChunks, manifestChunks []*filer_pb.FileChunk, manifestResolveErr error) { + // TODO maybe parallel this + for _, chunk := range chunks { + if !chunk.IsChunkManifest { + dataChunks = append(dataChunks, chunk) + continue + } + + resolvedChunks, err := ResolveOneChunkManifest(lookupFileIdFn, chunk) + if err != nil { + return chunks, nil, err + } + + manifestChunks = append(manifestChunks, chunk) + // recursive + dchunks, mchunks, subErr := ResolveChunkManifest(lookupFileIdFn, resolvedChunks) + if subErr != nil { + return chunks, nil, subErr + } + dataChunks = append(dataChunks, dchunks...) + manifestChunks = append(manifestChunks, mchunks...) + } + return +} + +func ResolveOneChunkManifest(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunk *filer_pb.FileChunk) (dataChunks []*filer_pb.FileChunk, manifestResolveErr error) { + if !chunk.IsChunkManifest { + return + } + + // IsChunkManifest + data, err := fetchChunk(lookupFileIdFn, chunk.GetFileIdString(), chunk.CipherKey, chunk.IsCompressed) + if err != nil { + return nil, fmt.Errorf("fail to read manifest %s: %v", chunk.GetFileIdString(), err) + } + m := &filer_pb.FileChunkManifest{} + if err := proto.Unmarshal(data, m); err != nil { + return nil, fmt.Errorf("fail to unmarshal manifest %s: %v", chunk.GetFileIdString(), err) + } + + // recursive + filer_pb.AfterEntryDeserialization(m.Chunks) + return m.Chunks, nil +} + +// TODO fetch from cache for weed mount? +func fetchChunk(lookupFileIdFn wdclient.LookupFileIdFunctionType, fileId string, cipherKey []byte, isGzipped bool) ([]byte, error) { + urlStrings, err := lookupFileIdFn(fileId) + if err != nil { + glog.Errorf("operation LookupFileId %s failed, err: %v", fileId, err) + return nil, err + } + return retriedFetchChunkData(urlStrings, cipherKey, isGzipped, true, 0, 0) +} + +func retriedFetchChunkData(urlStrings []string, cipherKey []byte, isGzipped bool, isFullChunk bool, offset int64, size int) ([]byte, error) { + + var err error + var shouldRetry bool + receivedData := make([]byte, 0, size) + + for waitTime := time.Second; waitTime < util.RetryWaitTime; waitTime += waitTime / 2 { + for _, urlString := range urlStrings { + receivedData = receivedData[:0] + shouldRetry, err = util.ReadUrlAsStream(urlString+"?readDeleted=true", cipherKey, isGzipped, isFullChunk, offset, size, func(data []byte) { + receivedData = append(receivedData, data...) + }) + if !shouldRetry { + break + } + if err != nil { + glog.V(0).Infof("read %s failed, err: %v", urlString, err) + } else { + break + } + } + if err != nil && shouldRetry { + glog.V(0).Infof("retry reading in %v", waitTime) + time.Sleep(waitTime) + } else { + break + } + } + + return receivedData, err + +} + +func MaybeManifestize(saveFunc SaveDataAsChunkFunctionType, inputChunks []*filer_pb.FileChunk) (chunks []*filer_pb.FileChunk, err error) { + return doMaybeManifestize(saveFunc, inputChunks, ManifestBatch, mergeIntoManifest) +} + +func doMaybeManifestize(saveFunc SaveDataAsChunkFunctionType, inputChunks []*filer_pb.FileChunk, mergeFactor int, mergefn func(saveFunc SaveDataAsChunkFunctionType, dataChunks []*filer_pb.FileChunk) (manifestChunk *filer_pb.FileChunk, err error)) (chunks []*filer_pb.FileChunk, err error) { + + var dataChunks []*filer_pb.FileChunk + for _, chunk := range inputChunks { + if !chunk.IsChunkManifest { + dataChunks = append(dataChunks, chunk) + } else { + chunks = append(chunks, chunk) + } + } + + remaining := len(dataChunks) + for i := 0; i+mergeFactor <= len(dataChunks); i += mergeFactor { + chunk, err := mergefn(saveFunc, dataChunks[i:i+mergeFactor]) + if err != nil { + return dataChunks, err + } + chunks = append(chunks, chunk) + remaining -= mergeFactor + } + // remaining + for i := len(dataChunks) - remaining; i < len(dataChunks); i++ { + chunks = append(chunks, dataChunks[i]) + } + return +} + +func mergeIntoManifest(saveFunc SaveDataAsChunkFunctionType, dataChunks []*filer_pb.FileChunk) (manifestChunk *filer_pb.FileChunk, err error) { + + filer_pb.BeforeEntrySerialization(dataChunks) + + // create and serialize the manifest + data, serErr := proto.Marshal(&filer_pb.FileChunkManifest{ + Chunks: dataChunks, + }) + if serErr != nil { + return nil, fmt.Errorf("serializing manifest: %v", serErr) + } + + minOffset, maxOffset := int64(math.MaxInt64), int64(math.MinInt64) + for _, chunk := range dataChunks { + if minOffset > int64(chunk.Offset) { + minOffset = chunk.Offset + } + if maxOffset < int64(chunk.Size)+chunk.Offset { + maxOffset = int64(chunk.Size) + chunk.Offset + } + } + + manifestChunk, _, _, err = saveFunc(bytes.NewReader(data), "", 0) + if err != nil { + return nil, err + } + manifestChunk.IsChunkManifest = true + manifestChunk.Offset = minOffset + manifestChunk.Size = uint64(maxOffset - minOffset) + + return +} + +type SaveDataAsChunkFunctionType func(reader io.Reader, name string, offset int64) (chunk *filer_pb.FileChunk, collection, replication string, err error) diff --git a/weed/filer/filechunk_manifest_test.go b/weed/filer/filechunk_manifest_test.go new file mode 100644 index 000000000..ce12c5da6 --- /dev/null +++ b/weed/filer/filechunk_manifest_test.go @@ -0,0 +1,113 @@ +package filer + +import ( + "bytes" + "math" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func TestDoMaybeManifestize(t *testing.T) { + var manifestTests = []struct { + inputs []*filer_pb.FileChunk + expected []*filer_pb.FileChunk + }{ + { + inputs: []*filer_pb.FileChunk{ + {FileId: "1", IsChunkManifest: false}, + {FileId: "2", IsChunkManifest: false}, + {FileId: "3", IsChunkManifest: false}, + {FileId: "4", IsChunkManifest: false}, + }, + expected: []*filer_pb.FileChunk{ + {FileId: "12", IsChunkManifest: true}, + {FileId: "34", IsChunkManifest: true}, + }, + }, + { + inputs: []*filer_pb.FileChunk{ + {FileId: "1", IsChunkManifest: true}, + {FileId: "2", IsChunkManifest: false}, + {FileId: "3", IsChunkManifest: false}, + {FileId: "4", IsChunkManifest: false}, + }, + expected: []*filer_pb.FileChunk{ + {FileId: "1", IsChunkManifest: true}, + {FileId: "23", IsChunkManifest: true}, + {FileId: "4", IsChunkManifest: false}, + }, + }, + { + inputs: []*filer_pb.FileChunk{ + {FileId: "1", IsChunkManifest: false}, + {FileId: "2", IsChunkManifest: true}, + {FileId: "3", IsChunkManifest: false}, + {FileId: "4", IsChunkManifest: false}, + }, + expected: []*filer_pb.FileChunk{ + {FileId: "2", IsChunkManifest: true}, + {FileId: "13", IsChunkManifest: true}, + {FileId: "4", IsChunkManifest: false}, + }, + }, + { + inputs: []*filer_pb.FileChunk{ + {FileId: "1", IsChunkManifest: true}, + {FileId: "2", IsChunkManifest: true}, + {FileId: "3", IsChunkManifest: false}, + {FileId: "4", IsChunkManifest: false}, + }, + expected: []*filer_pb.FileChunk{ + {FileId: "1", IsChunkManifest: true}, + {FileId: "2", IsChunkManifest: true}, + {FileId: "34", IsChunkManifest: true}, + }, + }, + } + + for i, mtest := range manifestTests { + println("test", i) + actual, _ := doMaybeManifestize(nil, mtest.inputs, 2, mockMerge) + assertEqualChunks(t, mtest.expected, actual) + } + +} + +func assertEqualChunks(t *testing.T, expected, actual []*filer_pb.FileChunk) { + assert.Equal(t, len(expected), len(actual)) + for i := 0; i < len(actual); i++ { + assertEqualChunk(t, actual[i], expected[i]) + } +} +func assertEqualChunk(t *testing.T, expected, actual *filer_pb.FileChunk) { + assert.Equal(t, expected.FileId, actual.FileId) + assert.Equal(t, expected.IsChunkManifest, actual.IsChunkManifest) +} + +func mockMerge(saveFunc SaveDataAsChunkFunctionType, dataChunks []*filer_pb.FileChunk) (manifestChunk *filer_pb.FileChunk, err error) { + + var buf bytes.Buffer + minOffset, maxOffset := int64(math.MaxInt64), int64(math.MinInt64) + for k := 0; k < len(dataChunks); k++ { + chunk := dataChunks[k] + buf.WriteString(chunk.FileId) + if minOffset > int64(chunk.Offset) { + minOffset = chunk.Offset + } + if maxOffset < int64(chunk.Size)+chunk.Offset { + maxOffset = int64(chunk.Size) + chunk.Offset + } + } + + manifestChunk = &filer_pb.FileChunk{ + FileId: buf.String(), + } + manifestChunk.IsChunkManifest = true + manifestChunk.Offset = minOffset + manifestChunk.Size = uint64(maxOffset - minOffset) + + return +} diff --git a/weed/filer/filechunks.go b/weed/filer/filechunks.go new file mode 100644 index 000000000..68f308a51 --- /dev/null +++ b/weed/filer/filechunks.go @@ -0,0 +1,292 @@ +package filer + +import ( + "bytes" + "encoding/hex" + "fmt" + "github.com/chrislusf/seaweedfs/weed/wdclient" + "math" + "sort" + "sync" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func TotalSize(chunks []*filer_pb.FileChunk) (size uint64) { + for _, c := range chunks { + t := uint64(c.Offset + int64(c.Size)) + if size < t { + size = t + } + } + return +} + +func FileSize(entry *filer_pb.Entry) (size uint64) { + return maxUint64(TotalSize(entry.Chunks), entry.Attributes.FileSize) +} + +func ETag(entry *filer_pb.Entry) (etag string) { + if entry.Attributes == nil || entry.Attributes.Md5 == nil { + return ETagChunks(entry.Chunks) + } + return fmt.Sprintf("%x", entry.Attributes.Md5) +} + +func ETagEntry(entry *Entry) (etag string) { + if entry.Attr.Md5 == nil { + return ETagChunks(entry.Chunks) + } + return fmt.Sprintf("%x", entry.Attr.Md5) +} + +func ETagChunks(chunks []*filer_pb.FileChunk) (etag string) { + if len(chunks) == 1 { + return chunks[0].ETag + } + md5_digests := [][]byte{} + for _, c := range chunks { + md5_decoded, _ := hex.DecodeString(c.ETag) + md5_digests = append(md5_digests, md5_decoded) + } + return fmt.Sprintf("%x-%d", util.Md5(bytes.Join(md5_digests, nil)), len(chunks)) +} + +func CompactFileChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) { + + visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks) + + fileIds := make(map[string]bool) + for _, interval := range visibles { + fileIds[interval.fileId] = true + } + for _, chunk := range chunks { + if _, found := fileIds[chunk.GetFileIdString()]; found { + compacted = append(compacted, chunk) + } else { + garbage = append(garbage, chunk) + } + } + + return +} + +func MinusChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk, err error) { + + aData, aMeta, aErr := ResolveChunkManifest(lookupFileIdFn, as) + if aErr != nil { + return nil, aErr + } + bData, bMeta, bErr := ResolveChunkManifest(lookupFileIdFn, bs) + if bErr != nil { + return nil, bErr + } + + delta = append(delta, DoMinusChunks(aData, bData)...) + delta = append(delta, DoMinusChunks(aMeta, bMeta)...) + return +} + +func DoMinusChunks(as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk) { + + fileIds := make(map[string]bool) + for _, interval := range bs { + fileIds[interval.GetFileIdString()] = true + } + for _, chunk := range as { + if _, found := fileIds[chunk.GetFileIdString()]; !found { + delta = append(delta, chunk) + } + } + + return +} + +type ChunkView struct { + FileId string + Offset int64 + Size uint64 + LogicOffset int64 // actual offset in the file, for the data specified via [offset, offset+size) in current chunk + ChunkSize uint64 + CipherKey []byte + IsGzipped bool +} + +func (cv *ChunkView) IsFullChunk() bool { + return cv.Size == cv.ChunkSize +} + +func ViewFromChunks(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk, offset int64, size int64) (views []*ChunkView) { + + visibles, _ := NonOverlappingVisibleIntervals(lookupFileIdFn, chunks) + + return ViewFromVisibleIntervals(visibles, offset, size) + +} + +func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int64) (views []*ChunkView) { + + stop := offset + size + if size == math.MaxInt64 { + stop = math.MaxInt64 + } + if stop < offset { + stop = math.MaxInt64 + } + + for _, chunk := range visibles { + + chunkStart, chunkStop := max(offset, chunk.start), min(stop, chunk.stop) + + if chunkStart < chunkStop { + views = append(views, &ChunkView{ + FileId: chunk.fileId, + Offset: chunkStart - chunk.start + chunk.chunkOffset, + Size: uint64(chunkStop - chunkStart), + LogicOffset: chunkStart, + ChunkSize: chunk.chunkSize, + CipherKey: chunk.cipherKey, + IsGzipped: chunk.isGzipped, + }) + } + } + + return views + +} + +func logPrintf(name string, visibles []VisibleInterval) { + + /* + glog.V(0).Infof("%s len %d", name, len(visibles)) + for _, v := range visibles { + glog.V(0).Infof("%s: [%d,%d) %s %d", name, v.start, v.stop, v.fileId, v.chunkOffset) + } + */ +} + +var bufPool = sync.Pool{ + New: func() interface{} { + return new(VisibleInterval) + }, +} + +func MergeIntoVisibles(visibles []VisibleInterval, chunk *filer_pb.FileChunk) (newVisibles []VisibleInterval) { + + newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Mtime, 0, chunk.Size, chunk.CipherKey, chunk.IsCompressed) + + length := len(visibles) + if length == 0 { + return append(visibles, newV) + } + last := visibles[length-1] + if last.stop <= chunk.Offset { + return append(visibles, newV) + } + + logPrintf(" before", visibles) + // glog.V(0).Infof("newVisibles %d adding chunk [%d,%d) %s size:%d", len(newVisibles), chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Size) + chunkStop := chunk.Offset + int64(chunk.Size) + for _, v := range visibles { + if v.start < chunk.Offset && chunk.Offset < v.stop { + t := newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTime, v.chunkOffset, v.chunkSize, v.cipherKey, v.isGzipped) + newVisibles = append(newVisibles, t) + // glog.V(0).Infof("visible %d [%d,%d) =1> [%d,%d)", i, v.start, v.stop, t.start, t.stop) + } + if v.start < chunkStop && chunkStop < v.stop { + t := newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTime, v.chunkOffset+(chunkStop-v.start), v.chunkSize, v.cipherKey, v.isGzipped) + newVisibles = append(newVisibles, t) + // glog.V(0).Infof("visible %d [%d,%d) =2> [%d,%d)", i, v.start, v.stop, t.start, t.stop) + } + if chunkStop <= v.start || v.stop <= chunk.Offset { + newVisibles = append(newVisibles, v) + // glog.V(0).Infof("visible %d [%d,%d) =3> [%d,%d)", i, v.start, v.stop, v.start, v.stop) + } + } + newVisibles = append(newVisibles, newV) + + logPrintf(" append", newVisibles) + + for i := len(newVisibles) - 1; i >= 0; i-- { + if i > 0 && newV.start < newVisibles[i-1].start { + newVisibles[i] = newVisibles[i-1] + } else { + newVisibles[i] = newV + break + } + } + logPrintf(" sorted", newVisibles) + + return newVisibles +} + +// NonOverlappingVisibleIntervals translates the file chunk into VisibleInterval in memory +// If the file chunk content is a chunk manifest +func NonOverlappingVisibleIntervals(lookupFileIdFn wdclient.LookupFileIdFunctionType, chunks []*filer_pb.FileChunk) (visibles []VisibleInterval, err error) { + + chunks, _, err = ResolveChunkManifest(lookupFileIdFn, chunks) + + sort.Slice(chunks, func(i, j int) bool { + if chunks[i].Mtime == chunks[j].Mtime { + filer_pb.EnsureFid(chunks[i]) + filer_pb.EnsureFid(chunks[j]) + if chunks[i].Fid == nil || chunks[j].Fid == nil { + return true + } + return chunks[i].Fid.FileKey < chunks[j].Fid.FileKey + } + return chunks[i].Mtime < chunks[j].Mtime // keep this to make tests run + }) + + for _, chunk := range chunks { + + // glog.V(0).Infof("merge [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size)) + visibles = MergeIntoVisibles(visibles, chunk) + + logPrintf("add", visibles) + + } + + return +} + +// find non-overlapping visible intervals +// visible interval map to one file chunk + +type VisibleInterval struct { + start int64 + stop int64 + modifiedTime int64 + fileId string + chunkOffset int64 + chunkSize uint64 + cipherKey []byte + isGzipped bool +} + +func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, chunkOffset int64, chunkSize uint64, cipherKey []byte, isGzipped bool) VisibleInterval { + return VisibleInterval{ + start: start, + stop: stop, + fileId: fileId, + modifiedTime: modifiedTime, + chunkOffset: chunkOffset, // the starting position in the chunk + chunkSize: chunkSize, + cipherKey: cipherKey, + isGzipped: isGzipped, + } +} + +func min(x, y int64) int64 { + if x <= y { + return x + } + return y +} +func max(x, y int64) int64 { + if x <= y { + return y + } + return x +} diff --git a/weed/filer/filechunks2_test.go b/weed/filer/filechunks2_test.go new file mode 100644 index 000000000..9f9566d9b --- /dev/null +++ b/weed/filer/filechunks2_test.go @@ -0,0 +1,46 @@ +package filer + +import ( + "sort" + "testing" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func TestCompactFileChunksRealCase(t *testing.T) { + + chunks := []*filer_pb.FileChunk{ + {FileId: "2,512f31f2c0700a", Offset: 0, Size: 25 - 0, Mtime: 5320497}, + {FileId: "6,512f2c2e24e9e8", Offset: 868352, Size: 917585 - 868352, Mtime: 5320492}, + {FileId: "7,514468dd5954ca", Offset: 884736, Size: 901120 - 884736, Mtime: 5325928}, + {FileId: "5,5144463173fe77", Offset: 917504, Size: 2297856 - 917504, Mtime: 5325894}, + {FileId: "4,51444c7ab54e2d", Offset: 2301952, Size: 2367488 - 2301952, Mtime: 5325900}, + {FileId: "4,514450e643ad22", Offset: 2371584, Size: 2420736 - 2371584, Mtime: 5325904}, + {FileId: "6,514456a5e9e4d7", Offset: 2449408, Size: 2490368 - 2449408, Mtime: 5325910}, + {FileId: "3,51444f8d53eebe", Offset: 2494464, Size: 2555904 - 2494464, Mtime: 5325903}, + {FileId: "4,5144578b097c7e", Offset: 2560000, Size: 2596864 - 2560000, Mtime: 5325911}, + {FileId: "3,51445500b6b4ac", Offset: 2637824, Size: 2678784 - 2637824, Mtime: 5325909}, + {FileId: "1,51446285e52a61", Offset: 2695168, Size: 2715648 - 2695168, Mtime: 5325922}, + } + + printChunks("before", chunks) + + compacted, garbage := CompactFileChunks(nil, chunks) + + printChunks("compacted", compacted) + printChunks("garbage", garbage) + +} + +func printChunks(name string, chunks []*filer_pb.FileChunk) { + sort.Slice(chunks, func(i, j int) bool { + if chunks[i].Offset == chunks[j].Offset { + return chunks[i].Mtime < chunks[j].Mtime + } + return chunks[i].Offset < chunks[j].Offset + }) + for _, chunk := range chunks { + glog.V(0).Infof("%s chunk %s [%10d,%10d)", name, chunk.GetFileIdString(), chunk.Offset, chunk.Offset+int64(chunk.Size)) + } +} diff --git a/weed/filer/filechunks_test.go b/weed/filer/filechunks_test.go new file mode 100644 index 000000000..699e7e298 --- /dev/null +++ b/weed/filer/filechunks_test.go @@ -0,0 +1,539 @@ +package filer + +import ( + "fmt" + "log" + "math" + "math/rand" + "strconv" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func TestCompactFileChunks(t *testing.T) { + chunks := []*filer_pb.FileChunk{ + {Offset: 10, Size: 100, FileId: "abc", Mtime: 50}, + {Offset: 100, Size: 100, FileId: "def", Mtime: 100}, + {Offset: 200, Size: 100, FileId: "ghi", Mtime: 200}, + {Offset: 110, Size: 200, FileId: "jkl", Mtime: 300}, + } + + compacted, garbage := CompactFileChunks(nil, chunks) + + if len(compacted) != 3 { + t.Fatalf("unexpected compacted: %d", len(compacted)) + } + if len(garbage) != 1 { + t.Fatalf("unexpected garbage: %d", len(garbage)) + } + +} + +func TestCompactFileChunks2(t *testing.T) { + + chunks := []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 50}, + {Offset: 100, Size: 100, FileId: "def", Mtime: 100}, + {Offset: 200, Size: 100, FileId: "ghi", Mtime: 200}, + {Offset: 0, Size: 100, FileId: "abcf", Mtime: 300}, + {Offset: 50, Size: 100, FileId: "fhfh", Mtime: 400}, + {Offset: 100, Size: 100, FileId: "yuyu", Mtime: 500}, + } + + k := 3 + + for n := 0; n < k; n++ { + chunks = append(chunks, &filer_pb.FileChunk{ + Offset: int64(n * 100), Size: 100, FileId: fmt.Sprintf("fileId%d", n), Mtime: int64(n), + }) + chunks = append(chunks, &filer_pb.FileChunk{ + Offset: int64(n * 50), Size: 100, FileId: fmt.Sprintf("fileId%d", n+k), Mtime: int64(n + k), + }) + } + + compacted, garbage := CompactFileChunks(nil, chunks) + + if len(compacted) != 4 { + t.Fatalf("unexpected compacted: %d", len(compacted)) + } + if len(garbage) != 8 { + t.Fatalf("unexpected garbage: %d", len(garbage)) + } +} + +func TestRandomFileChunksCompact(t *testing.T) { + + data := make([]byte, 1024) + + var chunks []*filer_pb.FileChunk + for i := 0; i < 15; i++ { + start, stop := rand.Intn(len(data)), rand.Intn(len(data)) + if start > stop { + start, stop = stop, start + } + if start+16 < stop { + stop = start + 16 + } + chunk := &filer_pb.FileChunk{ + FileId: strconv.Itoa(i), + Offset: int64(start), + Size: uint64(stop - start), + Mtime: int64(i), + Fid: &filer_pb.FileId{FileKey: uint64(i)}, + } + chunks = append(chunks, chunk) + for x := start; x < stop; x++ { + data[x] = byte(i) + } + } + + visibles, _ := NonOverlappingVisibleIntervals(nil, chunks) + + for _, v := range visibles { + for x := v.start; x < v.stop; x++ { + assert.Equal(t, strconv.Itoa(int(data[x])), v.fileId) + } + } + +} + +func TestIntervalMerging(t *testing.T) { + + testcases := []struct { + Chunks []*filer_pb.FileChunk + Expected []*VisibleInterval + }{ + // case 0: normal + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 100, Size: 100, FileId: "asdf", Mtime: 134}, + {Offset: 200, Size: 100, FileId: "fsad", Mtime: 353}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 100, fileId: "abc"}, + {start: 100, stop: 200, fileId: "asdf"}, + {start: 200, stop: 300, fileId: "fsad"}, + }, + }, + // case 1: updates overwrite full chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 200, fileId: "asdf"}, + }, + }, + // case 2: updates overwrite part of previous chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "a", Mtime: 123}, + {Offset: 0, Size: 70, FileId: "b", Mtime: 134}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 70, fileId: "b"}, + {start: 70, stop: 100, fileId: "a", chunkOffset: 70}, + }, + }, + // case 3: updates overwrite full chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, + {Offset: 50, Size: 250, FileId: "xxxx", Mtime: 154}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 50, fileId: "asdf"}, + {start: 50, stop: 300, fileId: "xxxx"}, + }, + }, + // case 4: updates far away from prev chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, + {Offset: 250, Size: 250, FileId: "xxxx", Mtime: 154}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 200, fileId: "asdf"}, + {start: 250, stop: 500, fileId: "xxxx"}, + }, + }, + // case 5: updates overwrite full chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "a", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "d", Mtime: 184}, + {Offset: 70, Size: 150, FileId: "c", Mtime: 143}, + {Offset: 80, Size: 100, FileId: "b", Mtime: 134}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 200, fileId: "d"}, + {start: 200, stop: 220, fileId: "c", chunkOffset: 130}, + }, + }, + // case 6: same updates + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Fid: &filer_pb.FileId{FileKey: 1}, Mtime: 123}, + {Offset: 0, Size: 100, FileId: "axf", Fid: &filer_pb.FileId{FileKey: 2}, Mtime: 123}, + {Offset: 0, Size: 100, FileId: "xyz", Fid: &filer_pb.FileId{FileKey: 3}, Mtime: 123}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 100, fileId: "xyz"}, + }, + }, + // case 7: real updates + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 2097152, FileId: "7,0294cbb9892b", Mtime: 123}, + {Offset: 0, Size: 3145728, FileId: "3,029565bf3092", Mtime: 130}, + {Offset: 2097152, Size: 3145728, FileId: "6,029632f47ae2", Mtime: 140}, + {Offset: 5242880, Size: 3145728, FileId: "2,029734c5aa10", Mtime: 150}, + {Offset: 8388608, Size: 3145728, FileId: "5,02982f80de50", Mtime: 160}, + {Offset: 11534336, Size: 2842193, FileId: "7,0299ad723803", Mtime: 170}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 2097152, fileId: "3,029565bf3092"}, + {start: 2097152, stop: 5242880, fileId: "6,029632f47ae2"}, + {start: 5242880, stop: 8388608, fileId: "2,029734c5aa10"}, + {start: 8388608, stop: 11534336, fileId: "5,02982f80de50"}, + {start: 11534336, stop: 14376529, fileId: "7,0299ad723803"}, + }, + }, + // case 8: real bug + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 77824, FileId: "4,0b3df938e301", Mtime: 123}, + {Offset: 471040, Size: 472225 - 471040, FileId: "6,0b3e0650019c", Mtime: 130}, + {Offset: 77824, Size: 208896 - 77824, FileId: "4,0b3f0c7202f0", Mtime: 140}, + {Offset: 208896, Size: 339968 - 208896, FileId: "2,0b4031a72689", Mtime: 150}, + {Offset: 339968, Size: 471040 - 339968, FileId: "3,0b416a557362", Mtime: 160}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 77824, fileId: "4,0b3df938e301"}, + {start: 77824, stop: 208896, fileId: "4,0b3f0c7202f0"}, + {start: 208896, stop: 339968, fileId: "2,0b4031a72689"}, + {start: 339968, stop: 471040, fileId: "3,0b416a557362"}, + {start: 471040, stop: 472225, fileId: "6,0b3e0650019c"}, + }, + }, + } + + for i, testcase := range testcases { + log.Printf("++++++++++ merged test case %d ++++++++++++++++++++", i) + intervals, _ := NonOverlappingVisibleIntervals(nil, testcase.Chunks) + for x, interval := range intervals { + log.Printf("test case %d, interval %d, start=%d, stop=%d, fileId=%s", + i, x, interval.start, interval.stop, interval.fileId) + } + for x, interval := range intervals { + if interval.start != testcase.Expected[x].start { + t.Fatalf("failed on test case %d, interval %d, start %d, expect %d", + i, x, interval.start, testcase.Expected[x].start) + } + if interval.stop != testcase.Expected[x].stop { + t.Fatalf("failed on test case %d, interval %d, stop %d, expect %d", + i, x, interval.stop, testcase.Expected[x].stop) + } + if interval.fileId != testcase.Expected[x].fileId { + t.Fatalf("failed on test case %d, interval %d, chunkId %s, expect %s", + i, x, interval.fileId, testcase.Expected[x].fileId) + } + if interval.chunkOffset != testcase.Expected[x].chunkOffset { + t.Fatalf("failed on test case %d, interval %d, chunkOffset %d, expect %d", + i, x, interval.chunkOffset, testcase.Expected[x].chunkOffset) + } + } + if len(intervals) != len(testcase.Expected) { + t.Fatalf("failed to compact test case %d, len %d expected %d", i, len(intervals), len(testcase.Expected)) + } + + } + +} + +func TestChunksReading(t *testing.T) { + + testcases := []struct { + Chunks []*filer_pb.FileChunk + Offset int64 + Size int64 + Expected []*ChunkView + }{ + // case 0: normal + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 100, Size: 100, FileId: "asdf", Mtime: 134}, + {Offset: 200, Size: 100, FileId: "fsad", Mtime: 353}, + }, + Offset: 0, + Size: 250, + Expected: []*ChunkView{ + {Offset: 0, Size: 100, FileId: "abc", LogicOffset: 0}, + {Offset: 0, Size: 100, FileId: "asdf", LogicOffset: 100}, + {Offset: 0, Size: 50, FileId: "fsad", LogicOffset: 200}, + }, + }, + // case 1: updates overwrite full chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, + }, + Offset: 50, + Size: 100, + Expected: []*ChunkView{ + {Offset: 50, Size: 100, FileId: "asdf", LogicOffset: 50}, + }, + }, + // case 2: updates overwrite part of previous chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 3, Size: 100, FileId: "a", Mtime: 123}, + {Offset: 10, Size: 50, FileId: "b", Mtime: 134}, + }, + Offset: 30, + Size: 40, + Expected: []*ChunkView{ + {Offset: 20, Size: 30, FileId: "b", LogicOffset: 30}, + {Offset: 57, Size: 10, FileId: "a", LogicOffset: 60}, + }, + }, + // case 3: updates overwrite full chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, + {Offset: 50, Size: 250, FileId: "xxxx", Mtime: 154}, + }, + Offset: 0, + Size: 200, + Expected: []*ChunkView{ + {Offset: 0, Size: 50, FileId: "asdf", LogicOffset: 0}, + {Offset: 0, Size: 150, FileId: "xxxx", LogicOffset: 50}, + }, + }, + // case 4: updates far away from prev chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, + {Offset: 250, Size: 250, FileId: "xxxx", Mtime: 154}, + }, + Offset: 0, + Size: 400, + Expected: []*ChunkView{ + {Offset: 0, Size: 200, FileId: "asdf", LogicOffset: 0}, + {Offset: 0, Size: 150, FileId: "xxxx", LogicOffset: 250}, + }, + }, + // case 5: updates overwrite full chunks + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "a", Mtime: 123}, + {Offset: 0, Size: 200, FileId: "c", Mtime: 184}, + {Offset: 70, Size: 150, FileId: "b", Mtime: 143}, + {Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134}, + }, + Offset: 0, + Size: 220, + Expected: []*ChunkView{ + {Offset: 0, Size: 200, FileId: "c", LogicOffset: 0}, + {Offset: 130, Size: 20, FileId: "b", LogicOffset: 200}, + }, + }, + // case 6: same updates + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Fid: &filer_pb.FileId{FileKey: 1}, Mtime: 123}, + {Offset: 0, Size: 100, FileId: "def", Fid: &filer_pb.FileId{FileKey: 2}, Mtime: 123}, + {Offset: 0, Size: 100, FileId: "xyz", Fid: &filer_pb.FileId{FileKey: 3}, Mtime: 123}, + }, + Offset: 0, + Size: 100, + Expected: []*ChunkView{ + {Offset: 0, Size: 100, FileId: "xyz", LogicOffset: 0}, + }, + }, + // case 7: edge cases + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 100, Size: 100, FileId: "asdf", Mtime: 134}, + {Offset: 200, Size: 100, FileId: "fsad", Mtime: 353}, + }, + Offset: 0, + Size: 200, + Expected: []*ChunkView{ + {Offset: 0, Size: 100, FileId: "abc", LogicOffset: 0}, + {Offset: 0, Size: 100, FileId: "asdf", LogicOffset: 100}, + }, + }, + // case 8: edge cases + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 90, Size: 200, FileId: "asdf", Mtime: 134}, + {Offset: 190, Size: 300, FileId: "fsad", Mtime: 353}, + }, + Offset: 0, + Size: 300, + Expected: []*ChunkView{ + {Offset: 0, Size: 90, FileId: "abc", LogicOffset: 0}, + {Offset: 0, Size: 100, FileId: "asdf", LogicOffset: 90}, + {Offset: 0, Size: 110, FileId: "fsad", LogicOffset: 190}, + }, + }, + // case 9: edge cases + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 43175947, FileId: "2,111fc2cbfac1", Mtime: 1}, + {Offset: 43175936, Size: 52981771 - 43175936, FileId: "2,112a36ea7f85", Mtime: 2}, + {Offset: 52981760, Size: 72564747 - 52981760, FileId: "4,112d5f31c5e7", Mtime: 3}, + {Offset: 72564736, Size: 133255179 - 72564736, FileId: "1,113245f0cdb6", Mtime: 4}, + {Offset: 133255168, Size: 137269259 - 133255168, FileId: "3,1141a70733b5", Mtime: 5}, + {Offset: 137269248, Size: 153578836 - 137269248, FileId: "1,114201d5bbdb", Mtime: 6}, + }, + Offset: 0, + Size: 153578836, + Expected: []*ChunkView{ + {Offset: 0, Size: 43175936, FileId: "2,111fc2cbfac1", LogicOffset: 0}, + {Offset: 0, Size: 52981760 - 43175936, FileId: "2,112a36ea7f85", LogicOffset: 43175936}, + {Offset: 0, Size: 72564736 - 52981760, FileId: "4,112d5f31c5e7", LogicOffset: 52981760}, + {Offset: 0, Size: 133255168 - 72564736, FileId: "1,113245f0cdb6", LogicOffset: 72564736}, + {Offset: 0, Size: 137269248 - 133255168, FileId: "3,1141a70733b5", LogicOffset: 133255168}, + {Offset: 0, Size: 153578836 - 137269248, FileId: "1,114201d5bbdb", LogicOffset: 137269248}, + }, + }, + } + + for i, testcase := range testcases { + if i != 2 { + // continue + } + log.Printf("++++++++++ read test case %d ++++++++++++++++++++", i) + chunks := ViewFromChunks(nil, testcase.Chunks, testcase.Offset, testcase.Size) + for x, chunk := range chunks { + log.Printf("read case %d, chunk %d, offset=%d, size=%d, fileId=%s", + i, x, chunk.Offset, chunk.Size, chunk.FileId) + if chunk.Offset != testcase.Expected[x].Offset { + t.Fatalf("failed on read case %d, chunk %s, Offset %d, expect %d", + i, chunk.FileId, chunk.Offset, testcase.Expected[x].Offset) + } + if chunk.Size != testcase.Expected[x].Size { + t.Fatalf("failed on read case %d, chunk %s, Size %d, expect %d", + i, chunk.FileId, chunk.Size, testcase.Expected[x].Size) + } + if chunk.FileId != testcase.Expected[x].FileId { + t.Fatalf("failed on read case %d, chunk %d, FileId %s, expect %s", + i, x, chunk.FileId, testcase.Expected[x].FileId) + } + if chunk.LogicOffset != testcase.Expected[x].LogicOffset { + t.Fatalf("failed on read case %d, chunk %d, LogicOffset %d, expect %d", + i, x, chunk.LogicOffset, testcase.Expected[x].LogicOffset) + } + } + if len(chunks) != len(testcase.Expected) { + t.Fatalf("failed to read test case %d, len %d expected %d", i, len(chunks), len(testcase.Expected)) + } + } + +} + +func BenchmarkCompactFileChunks(b *testing.B) { + + var chunks []*filer_pb.FileChunk + + k := 1024 + + for n := 0; n < k; n++ { + chunks = append(chunks, &filer_pb.FileChunk{ + Offset: int64(n * 100), Size: 100, FileId: fmt.Sprintf("fileId%d", n), Mtime: int64(n), + }) + chunks = append(chunks, &filer_pb.FileChunk{ + Offset: int64(n * 50), Size: 100, FileId: fmt.Sprintf("fileId%d", n+k), Mtime: int64(n + k), + }) + } + + for n := 0; n < b.N; n++ { + CompactFileChunks(nil, chunks) + } +} + +func TestViewFromVisibleIntervals(t *testing.T) { + visibles := []VisibleInterval{ + { + start: 0, + stop: 25, + fileId: "fid1", + }, + { + start: 4096, + stop: 8192, + fileId: "fid2", + }, + { + start: 16384, + stop: 18551, + fileId: "fid3", + }, + } + + views := ViewFromVisibleIntervals(visibles, 0, math.MaxInt32) + + if len(views) != len(visibles) { + assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error") + } + +} + +func TestViewFromVisibleIntervals2(t *testing.T) { + visibles := []VisibleInterval{ + { + start: 344064, + stop: 348160, + fileId: "fid1", + }, + { + start: 348160, + stop: 356352, + fileId: "fid2", + }, + } + + views := ViewFromVisibleIntervals(visibles, 0, math.MaxInt32) + + if len(views) != len(visibles) { + assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error") + } + +} + +func TestViewFromVisibleIntervals3(t *testing.T) { + visibles := []VisibleInterval{ + { + start: 1000, + stop: 2000, + fileId: "fid1", + }, + { + start: 3000, + stop: 4000, + fileId: "fid2", + }, + } + + views := ViewFromVisibleIntervals(visibles, 1700, 1500) + + if len(views) != len(visibles) { + assert.Equal(t, len(visibles), len(views), "ViewFromVisibleIntervals error") + } + +} diff --git a/weed/filer/filer.go b/weed/filer/filer.go new file mode 100644 index 000000000..effdc0e4e --- /dev/null +++ b/weed/filer/filer.go @@ -0,0 +1,304 @@ +package filer + +import ( + "context" + "fmt" + "os" + "strings" + "time" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/util/log_buffer" + "github.com/chrislusf/seaweedfs/weed/wdclient" +) + +const ( + LogFlushInterval = time.Minute + PaginationSize = 1024 + FilerStoreId = "filer.store.id" +) + +var ( + OS_UID = uint32(os.Getuid()) + OS_GID = uint32(os.Getgid()) +) + +type Filer struct { + Store VirtualFilerStore + MasterClient *wdclient.MasterClient + fileIdDeletionQueue *util.UnboundedQueue + GrpcDialOption grpc.DialOption + DirBucketsPath string + FsyncBuckets []string + buckets *FilerBuckets + Cipher bool + LocalMetaLogBuffer *log_buffer.LogBuffer + metaLogCollection string + metaLogReplication string + MetaAggregator *MetaAggregator + Signature int32 + FilerConf *FilerConf +} + +func NewFiler(masters []string, grpcDialOption grpc.DialOption, + filerHost string, filerGrpcPort uint32, collection string, replication string, dataCenter string, notifyFn func()) *Filer { + f := &Filer{ + MasterClient: wdclient.NewMasterClient(grpcDialOption, "filer", filerHost, filerGrpcPort, dataCenter, masters), + fileIdDeletionQueue: util.NewUnboundedQueue(), + GrpcDialOption: grpcDialOption, + FilerConf: NewFilerConf(), + } + f.LocalMetaLogBuffer = log_buffer.NewLogBuffer(LogFlushInterval, f.logFlushFunc, notifyFn) + f.metaLogCollection = collection + f.metaLogReplication = replication + + go f.loopProcessingDeletion() + + return f +} + +func (f *Filer) AggregateFromPeers(self string, filers []string) { + + // set peers + found := false + for _, peer := range filers { + if peer == self { + found = true + } + } + if !found { + filers = append(filers, self) + } + + f.MetaAggregator = NewMetaAggregator(filers, f.GrpcDialOption) + f.MetaAggregator.StartLoopSubscribe(f, self) + +} + +func (f *Filer) SetStore(store FilerStore) { + f.Store = NewFilerStoreWrapper(store) + + f.setOrLoadFilerStoreSignature(store) + +} + +func (f *Filer) setOrLoadFilerStoreSignature(store FilerStore) { + storeIdBytes, err := store.KvGet(context.Background(), []byte(FilerStoreId)) + if err == ErrKvNotFound || err == nil && len(storeIdBytes) == 0 { + f.Signature = util.RandomInt32() + storeIdBytes = make([]byte, 4) + util.Uint32toBytes(storeIdBytes, uint32(f.Signature)) + if err = store.KvPut(context.Background(), []byte(FilerStoreId), storeIdBytes); err != nil { + glog.Fatalf("set %s=%d : %v", FilerStoreId, f.Signature, err) + } + glog.V(0).Infof("create %s to %d", FilerStoreId, f.Signature) + } else if err == nil && len(storeIdBytes) == 4 { + f.Signature = int32(util.BytesToUint32(storeIdBytes)) + glog.V(0).Infof("existing %s = %d", FilerStoreId, f.Signature) + } else { + glog.Fatalf("read %v=%v : %v", FilerStoreId, string(storeIdBytes), err) + } +} + +func (f *Filer) GetStore() (store FilerStore) { + return f.Store +} + +func (fs *Filer) GetMaster() string { + return fs.MasterClient.GetMaster() +} + +func (fs *Filer) KeepConnectedToMaster() { + fs.MasterClient.KeepConnectedToMaster() +} + +func (f *Filer) BeginTransaction(ctx context.Context) (context.Context, error) { + return f.Store.BeginTransaction(ctx) +} + +func (f *Filer) CommitTransaction(ctx context.Context) error { + return f.Store.CommitTransaction(ctx) +} + +func (f *Filer) RollbackTransaction(ctx context.Context) error { + return f.Store.RollbackTransaction(ctx) +} + +func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool, isFromOtherCluster bool, signatures []int32) error { + + if string(entry.FullPath) == "/" { + return nil + } + + oldEntry, _ := f.FindEntry(ctx, entry.FullPath) + + /* + if !hasWritePermission(lastDirectoryEntry, entry) { + glog.V(0).Infof("directory %s: %v, entry: uid=%d gid=%d", + lastDirectoryEntry.FullPath, lastDirectoryEntry.Attr, entry.Uid, entry.Gid) + return fmt.Errorf("no write permission in folder %v", lastDirectoryEntry.FullPath) + } + */ + + if oldEntry == nil { + + dirParts := strings.Split(string(entry.FullPath), "/") + if err := f.ensureParentDirecotryEntry(ctx, entry, dirParts, len(dirParts)-1, isFromOtherCluster); err != nil { + return err + } + + glog.V(4).Infof("InsertEntry %s: new entry: %v", entry.FullPath, entry.Name()) + if err := f.Store.InsertEntry(ctx, entry); err != nil { + glog.Errorf("insert entry %s: %v", entry.FullPath, err) + return fmt.Errorf("insert entry %s: %v", entry.FullPath, err) + } + } else { + if o_excl { + glog.V(3).Infof("EEXIST: entry %s already exists", entry.FullPath) + return fmt.Errorf("EEXIST: entry %s already exists", entry.FullPath) + } + glog.V(4).Infof("UpdateEntry %s: old entry: %v", entry.FullPath, oldEntry.Name()) + if err := f.UpdateEntry(ctx, oldEntry, entry); err != nil { + glog.Errorf("update entry %s: %v", entry.FullPath, err) + return fmt.Errorf("update entry %s: %v", entry.FullPath, err) + } + } + + f.maybeAddBucket(entry) + f.NotifyUpdateEvent(ctx, oldEntry, entry, true, isFromOtherCluster, signatures) + + f.deleteChunksIfNotNew(oldEntry, entry) + + glog.V(4).Infof("CreateEntry %s: created", entry.FullPath) + + return nil +} + +func (f *Filer) ensureParentDirecotryEntry(ctx context.Context, entry *Entry, dirParts []string, level int, isFromOtherCluster bool) (err error) { + + if level == 0 { + return nil + } + + dirPath := "/" + util.Join(dirParts[:level]...) + // fmt.Printf("%d directory: %+v\n", i, dirPath) + + // check the store directly + glog.V(4).Infof("find uncached directory: %s", dirPath) + dirEntry, _ := f.FindEntry(ctx, util.FullPath(dirPath)) + + // no such existing directory + if dirEntry == nil { + + // ensure parent directory + if err = f.ensureParentDirecotryEntry(ctx, entry, dirParts, level-1, isFromOtherCluster); err != nil { + return err + } + + // create the directory + now := time.Now() + + dirEntry = &Entry{ + FullPath: util.FullPath(dirPath), + Attr: Attr{ + Mtime: now, + Crtime: now, + Mode: os.ModeDir | entry.Mode | 0110, + Uid: entry.Uid, + Gid: entry.Gid, + Collection: entry.Collection, + Replication: entry.Replication, + UserName: entry.UserName, + GroupNames: entry.GroupNames, + }, + } + + glog.V(2).Infof("create directory: %s %v", dirPath, dirEntry.Mode) + mkdirErr := f.Store.InsertEntry(ctx, dirEntry) + if mkdirErr != nil { + if _, err := f.FindEntry(ctx, util.FullPath(dirPath)); err == filer_pb.ErrNotFound { + glog.V(3).Infof("mkdir %s: %v", dirPath, mkdirErr) + return fmt.Errorf("mkdir %s: %v", dirPath, mkdirErr) + } + } else { + f.maybeAddBucket(dirEntry) + f.NotifyUpdateEvent(ctx, nil, dirEntry, false, isFromOtherCluster, nil) + } + + } else if !dirEntry.IsDirectory() { + glog.Errorf("CreateEntry %s: %s should be a directory", entry.FullPath, dirPath) + return fmt.Errorf("%s is a file", dirPath) + } + + return nil +} + +func (f *Filer) UpdateEntry(ctx context.Context, oldEntry, entry *Entry) (err error) { + if oldEntry != nil { + entry.Attr.Crtime = oldEntry.Attr.Crtime + if oldEntry.IsDirectory() && !entry.IsDirectory() { + glog.Errorf("existing %s is a directory", oldEntry.FullPath) + return fmt.Errorf("existing %s is a directory", oldEntry.FullPath) + } + if !oldEntry.IsDirectory() && entry.IsDirectory() { + glog.Errorf("existing %s is a file", oldEntry.FullPath) + return fmt.Errorf("existing %s is a file", oldEntry.FullPath) + } + } + return f.Store.UpdateEntry(ctx, entry) +} + +var ( + Root = &Entry{ + FullPath: "/", + Attr: Attr{ + Mtime: time.Now(), + Crtime: time.Now(), + Mode: os.ModeDir | 0755, + Uid: OS_UID, + Gid: OS_GID, + }, + } +) + +func (f *Filer) FindEntry(ctx context.Context, p util.FullPath) (entry *Entry, err error) { + + if string(p) == "/" { + return Root, nil + } + entry, err = f.Store.FindEntry(ctx, p) + if entry != nil && entry.TtlSec > 0 { + if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { + f.Store.DeleteOneEntry(ctx, entry) + return nil, filer_pb.ErrNotFound + } + } + return + +} + +func (f *Filer) doListDirectoryEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int64, prefix string, eachEntryFunc ListEachEntryFunc) (expiredCount int64, lastFileName string, err error) { + lastFileName, err = f.Store.ListDirectoryPrefixedEntries(ctx, p, startFileName, inclusive, limit, prefix, func(entry *Entry) bool { + if entry.TtlSec > 0 { + if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { + f.Store.DeleteOneEntry(ctx, entry) + expiredCount++ + return true + } + } + return eachEntryFunc(entry) + }) + if err != nil { + return expiredCount, lastFileName, err + } + return +} + +func (f *Filer) Shutdown() { + f.LocalMetaLogBuffer.Shutdown() + f.Store.Shutdown() +} diff --git a/weed/filer/filer_buckets.go b/weed/filer/filer_buckets.go new file mode 100644 index 000000000..43fb000c9 --- /dev/null +++ b/weed/filer/filer_buckets.go @@ -0,0 +1,121 @@ +package filer + +import ( + "context" + "math" + "sync" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type BucketName string +type BucketOption struct { + Name BucketName + Replication string + fsync bool +} +type FilerBuckets struct { + dirBucketsPath string + buckets map[BucketName]*BucketOption + sync.RWMutex +} + +func (f *Filer) LoadBuckets() { + + f.buckets = &FilerBuckets{ + buckets: make(map[BucketName]*BucketOption), + } + + limit := int64(math.MaxInt32) + + entries, _, err := f.ListDirectoryEntries(context.Background(), util.FullPath(f.DirBucketsPath), "", false, limit, "", "", "") + + if err != nil { + glog.V(1).Infof("no buckets found: %v", err) + return + } + + shouldFsyncMap := make(map[string]bool) + for _, bucket := range f.FsyncBuckets { + shouldFsyncMap[bucket] = true + } + + glog.V(1).Infof("buckets found: %d", len(entries)) + + f.buckets.Lock() + for _, entry := range entries { + _, shouldFsnyc := shouldFsyncMap[entry.Name()] + f.buckets.buckets[BucketName(entry.Name())] = &BucketOption{ + Name: BucketName(entry.Name()), + Replication: entry.Replication, + fsync: shouldFsnyc, + } + } + f.buckets.Unlock() + +} + +func (f *Filer) ReadBucketOption(buketName string) (replication string, fsync bool) { + + f.buckets.RLock() + defer f.buckets.RUnlock() + + option, found := f.buckets.buckets[BucketName(buketName)] + + if !found { + return "", false + } + return option.Replication, option.fsync + +} + +func (f *Filer) isBucket(entry *Entry) bool { + if !entry.IsDirectory() { + return false + } + parent, dirName := entry.FullPath.DirAndName() + if parent != f.DirBucketsPath { + return false + } + + f.buckets.RLock() + defer f.buckets.RUnlock() + + _, found := f.buckets.buckets[BucketName(dirName)] + + return found + +} + +func (f *Filer) maybeAddBucket(entry *Entry) { + if !entry.IsDirectory() { + return + } + parent, dirName := entry.FullPath.DirAndName() + if parent != f.DirBucketsPath { + return + } + f.addBucket(dirName, &BucketOption{ + Name: BucketName(dirName), + Replication: entry.Replication, + }) +} + +func (f *Filer) addBucket(buketName string, bucketOption *BucketOption) { + + f.buckets.Lock() + defer f.buckets.Unlock() + + f.buckets.buckets[BucketName(buketName)] = bucketOption + +} + +func (f *Filer) deleteBucket(buketName string) { + + f.buckets.Lock() + defer f.buckets.Unlock() + + delete(f.buckets.buckets, BucketName(buketName)) + +} diff --git a/weed/filer/filer_conf.go b/weed/filer/filer_conf.go new file mode 100644 index 000000000..ab5afc5cc --- /dev/null +++ b/weed/filer/filer_conf.go @@ -0,0 +1,149 @@ +package filer + +import ( + "bytes" + "context" + "io" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/golang/protobuf/jsonpb" + "github.com/viant/ptrie" +) + +const ( + DirectoryEtcRoot = "/etc" + DirectoryEtcSeaweedFS = "/etc/seaweedfs" + FilerConfName = "filer.conf" + IamConfigDirecotry = "/etc/iam" + IamIdentityFile = "identity.json" + IamPoliciesFile = "policies.json" +) + +type FilerConf struct { + rules ptrie.Trie +} + +func NewFilerConf() (fc *FilerConf) { + fc = &FilerConf{ + rules: ptrie.New(), + } + return fc +} + +func (fc *FilerConf) loadFromFiler(filer *Filer) (err error) { + filerConfPath := util.NewFullPath(DirectoryEtcSeaweedFS, FilerConfName) + entry, err := filer.FindEntry(context.Background(), filerConfPath) + if err != nil { + if err == filer_pb.ErrNotFound { + return nil + } + glog.Errorf("read filer conf entry %s: %v", filerConfPath, err) + return + } + + if len(entry.Content) > 0 { + return fc.LoadFromBytes(entry.Content) + } + + return fc.loadFromChunks(filer, entry.Content, entry.Chunks) +} + +func (fc *FilerConf) loadFromChunks(filer *Filer, content []byte, chunks []*filer_pb.FileChunk) (err error) { + if len(content) == 0 { + content, err = filer.readEntry(chunks) + if err != nil { + glog.Errorf("read filer conf content: %v", err) + return + } + } + + return fc.LoadFromBytes(content) +} + +func (fc *FilerConf) LoadFromBytes(data []byte) (err error) { + conf := &filer_pb.FilerConf{} + + if err := jsonpb.Unmarshal(bytes.NewReader(data), conf); err != nil { + return err + } + + return fc.doLoadConf(conf) +} + +func (fc *FilerConf) doLoadConf(conf *filer_pb.FilerConf) (err error) { + for _, location := range conf.Locations { + err = fc.AddLocationConf(location) + if err != nil { + // this is not recoverable + return nil + } + } + return nil +} + +func (fc *FilerConf) AddLocationConf(locConf *filer_pb.FilerConf_PathConf) (err error) { + err = fc.rules.Put([]byte(locConf.LocationPrefix), locConf) + if err != nil { + glog.Errorf("put location prefix: %v", err) + } + return +} + +func (fc *FilerConf) DeleteLocationConf(locationPrefix string) { + rules := ptrie.New() + fc.rules.Walk(func(key []byte, value interface{}) bool { + if string(key) == locationPrefix { + return true + } + rules.Put(key, value) + return true + }) + fc.rules = rules + return +} + +func (fc *FilerConf) MatchStorageRule(path string) (pathConf *filer_pb.FilerConf_PathConf) { + pathConf = &filer_pb.FilerConf_PathConf{} + fc.rules.MatchPrefix([]byte(path), func(key []byte, value interface{}) bool { + t := value.(*filer_pb.FilerConf_PathConf) + mergePathConf(pathConf, t) + return true + }) + return pathConf +} + +// merge if values in b is not empty, merge them into a +func mergePathConf(a, b *filer_pb.FilerConf_PathConf) { + a.Collection = util.Nvl(b.Collection, a.Collection) + a.Replication = util.Nvl(b.Replication, a.Replication) + a.Ttl = util.Nvl(b.Ttl, a.Ttl) + if b.DiskType != "" { + a.DiskType = b.DiskType + } + a.Fsync = b.Fsync || a.Fsync + if b.VolumeGrowthCount > 0 { + a.VolumeGrowthCount = b.VolumeGrowthCount + } +} + +func (fc *FilerConf) ToProto() *filer_pb.FilerConf { + m := &filer_pb.FilerConf{} + fc.rules.Walk(func(key []byte, value interface{}) bool { + pathConf := value.(*filer_pb.FilerConf_PathConf) + m.Locations = append(m.Locations, pathConf) + return true + }) + return m +} + +func (fc *FilerConf) ToText(writer io.Writer) error { + + m := jsonpb.Marshaler{ + EmitDefaults: false, + Indent: " ", + } + + return m.Marshal(writer, fc.ToProto()) +} diff --git a/weed/filer/filer_conf_test.go b/weed/filer/filer_conf_test.go new file mode 100644 index 000000000..ff868a3ec --- /dev/null +++ b/weed/filer/filer_conf_test.go @@ -0,0 +1,34 @@ +package filer + +import ( + "testing" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/stretchr/testify/assert" +) + +func TestFilerConf(t *testing.T) { + + fc := NewFilerConf() + + conf := &filer_pb.FilerConf{Locations: []*filer_pb.FilerConf_PathConf{ + { + LocationPrefix: "/buckets/abc", + Collection: "abc", + }, + { + LocationPrefix: "/buckets/abcd", + Collection: "abcd", + }, + { + LocationPrefix: "/buckets/", + Replication: "001", + }, + }} + fc.doLoadConf(conf) + + assert.Equal(t, "abc", fc.MatchStorageRule("/buckets/abc/jasdf").Collection) + assert.Equal(t, "abcd", fc.MatchStorageRule("/buckets/abcd/jasdf").Collection) + assert.Equal(t, "001", fc.MatchStorageRule("/buckets/abc/jasdf").Replication) + +} diff --git a/weed/filer/filer_delete_entry.go b/weed/filer/filer_delete_entry.go new file mode 100644 index 000000000..3ef3cfff9 --- /dev/null +++ b/weed/filer/filer_delete_entry.go @@ -0,0 +1,161 @@ +package filer + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type HardLinkId []byte + +const ( + MsgFailDelNonEmptyFolder = "fail to delete non-empty folder" +) + +func (f *Filer) DeleteEntryMetaAndData(ctx context.Context, p util.FullPath, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isFromOtherCluster bool, signatures []int32) (err error) { + if p == "/" { + return nil + } + + entry, findErr := f.FindEntry(ctx, p) + if findErr != nil { + return findErr + } + + isDeleteCollection := f.isBucket(entry) + + var chunks []*filer_pb.FileChunk + var hardLinkIds []HardLinkId + chunks = append(chunks, entry.Chunks...) + if entry.IsDirectory() { + // delete the folder children, not including the folder itself + var dirChunks []*filer_pb.FileChunk + var dirHardLinkIds []HardLinkId + dirChunks, dirHardLinkIds, err = f.doBatchDeleteFolderMetaAndData(ctx, entry, isRecursive, ignoreRecursiveError, shouldDeleteChunks && !isDeleteCollection, isDeleteCollection, isFromOtherCluster, signatures) + if err != nil { + glog.V(0).Infof("delete directory %s: %v", p, err) + return fmt.Errorf("delete directory %s: %v", p, err) + } + chunks = append(chunks, dirChunks...) + hardLinkIds = append(hardLinkIds, dirHardLinkIds...) + } + + // delete the file or folder + err = f.doDeleteEntryMetaAndData(ctx, entry, shouldDeleteChunks, isFromOtherCluster, signatures) + if err != nil { + return fmt.Errorf("delete file %s: %v", p, err) + } + + if shouldDeleteChunks && !isDeleteCollection { + f.DirectDeleteChunks(chunks) + } + // A case not handled: + // what if the chunk is in a different collection? + if shouldDeleteChunks { + f.maybeDeleteHardLinks(hardLinkIds) + } + + if isDeleteCollection { + collectionName := entry.Name() + f.doDeleteCollection(collectionName) + f.deleteBucket(collectionName) + } + + return nil +} + +func (f *Filer) doBatchDeleteFolderMetaAndData(ctx context.Context, entry *Entry, isRecursive, ignoreRecursiveError, shouldDeleteChunks, isDeletingBucket, isFromOtherCluster bool, signatures []int32) (chunks []*filer_pb.FileChunk, hardlinkIds []HardLinkId, err error) { + + lastFileName := "" + includeLastFile := false + if !isDeletingBucket { + for { + entries, _, err := f.ListDirectoryEntries(ctx, entry.FullPath, lastFileName, includeLastFile, PaginationSize, "", "", "") + if err != nil { + glog.Errorf("list folder %s: %v", entry.FullPath, err) + return nil, nil, fmt.Errorf("list folder %s: %v", entry.FullPath, err) + } + if lastFileName == "" && !isRecursive && len(entries) > 0 { + // only for first iteration in the loop + glog.Errorf("deleting a folder %s has children: %+v ...", entry.FullPath, entries[0].Name()) + return nil, nil, fmt.Errorf("%s: %s", MsgFailDelNonEmptyFolder, entry.FullPath) + } + + for _, sub := range entries { + lastFileName = sub.Name() + var dirChunks []*filer_pb.FileChunk + var dirHardLinkIds []HardLinkId + if sub.IsDirectory() { + subIsDeletingBucket := f.isBucket(sub) + dirChunks, dirHardLinkIds, err = f.doBatchDeleteFolderMetaAndData(ctx, sub, isRecursive, ignoreRecursiveError, shouldDeleteChunks, subIsDeletingBucket, false, nil) + chunks = append(chunks, dirChunks...) + hardlinkIds = append(hardlinkIds, dirHardLinkIds...) + } else { + f.NotifyUpdateEvent(ctx, sub, nil, shouldDeleteChunks, isFromOtherCluster, nil) + if len(sub.HardLinkId) != 0 { + // hard link chunk data are deleted separately + hardlinkIds = append(hardlinkIds, sub.HardLinkId) + } else { + chunks = append(chunks, sub.Chunks...) + } + } + if err != nil && !ignoreRecursiveError { + return nil, nil, err + } + } + + if len(entries) < PaginationSize { + break + } + } + } + + glog.V(3).Infof("deleting directory %v delete %d chunks: %v", entry.FullPath, len(chunks), shouldDeleteChunks) + + if storeDeletionErr := f.Store.DeleteFolderChildren(ctx, entry.FullPath); storeDeletionErr != nil { + return nil, nil, fmt.Errorf("filer store delete: %v", storeDeletionErr) + } + + f.NotifyUpdateEvent(ctx, entry, nil, shouldDeleteChunks, isFromOtherCluster, signatures) + + return chunks, hardlinkIds, nil +} + +func (f *Filer) doDeleteEntryMetaAndData(ctx context.Context, entry *Entry, shouldDeleteChunks bool, isFromOtherCluster bool, signatures []int32) (err error) { + + glog.V(3).Infof("deleting entry %v, delete chunks: %v", entry.FullPath, shouldDeleteChunks) + + if storeDeletionErr := f.Store.DeleteOneEntry(ctx, entry); storeDeletionErr != nil { + return fmt.Errorf("filer store delete: %v", storeDeletionErr) + } + if !entry.IsDirectory() { + f.NotifyUpdateEvent(ctx, entry, nil, shouldDeleteChunks, isFromOtherCluster, signatures) + } + + return nil +} + +func (f *Filer) doDeleteCollection(collectionName string) (err error) { + + return f.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + _, err := client.CollectionDelete(context.Background(), &master_pb.CollectionDeleteRequest{ + Name: collectionName, + }) + if err != nil { + glog.Infof("delete collection %s: %v", collectionName, err) + } + return err + }) + +} + +func (f *Filer) maybeDeleteHardLinks(hardLinkIds []HardLinkId) { + for _, hardLinkId := range hardLinkIds { + if err := f.Store.DeleteHardLink(context.Background(), hardLinkId); err != nil { + glog.Errorf("delete hard link id %d : %v", hardLinkId, err) + } + } +} diff --git a/weed/filer/filer_deletion.go b/weed/filer/filer_deletion.go new file mode 100644 index 000000000..9eee38277 --- /dev/null +++ b/weed/filer/filer_deletion.go @@ -0,0 +1,153 @@ +package filer + +import ( + "strings" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/wdclient" +) + +func LookupByMasterClientFn(masterClient *wdclient.MasterClient) func(vids []string) (map[string]operation.LookupResult, error) { + return func(vids []string) (map[string]operation.LookupResult, error) { + m := make(map[string]operation.LookupResult) + for _, vid := range vids { + locs, _ := masterClient.GetVidLocations(vid) + var locations []operation.Location + for _, loc := range locs { + locations = append(locations, operation.Location{ + Url: loc.Url, + PublicUrl: loc.PublicUrl, + }) + } + m[vid] = operation.LookupResult{ + VolumeId: vid, + Locations: locations, + } + } + return m, nil + } +} + +func (f *Filer) loopProcessingDeletion() { + + lookupFunc := LookupByMasterClientFn(f.MasterClient) + + DeletionBatchSize := 100000 // roughly 20 bytes cost per file id. + + var deletionCount int + for { + deletionCount = 0 + f.fileIdDeletionQueue.Consume(func(fileIds []string) { + for len(fileIds) > 0 { + var toDeleteFileIds []string + if len(fileIds) > DeletionBatchSize { + toDeleteFileIds = fileIds[:DeletionBatchSize] + fileIds = fileIds[DeletionBatchSize:] + } else { + toDeleteFileIds = fileIds + fileIds = fileIds[:0] + } + deletionCount = len(toDeleteFileIds) + _, err := operation.DeleteFilesWithLookupVolumeId(f.GrpcDialOption, toDeleteFileIds, lookupFunc) + if err != nil { + if !strings.Contains(err.Error(), "already deleted") { + glog.V(0).Infof("deleting fileIds len=%d error: %v", deletionCount, err) + } + } else { + glog.V(1).Infof("deleting fileIds len=%d", deletionCount) + } + } + }) + + if deletionCount == 0 { + time.Sleep(1123 * time.Millisecond) + } + } +} + +func (f *Filer) doDeleteFileIds(fileIds []string) { + + lookupFunc := LookupByMasterClientFn(f.MasterClient) + DeletionBatchSize := 100000 // roughly 20 bytes cost per file id. + + for len(fileIds) > 0 { + var toDeleteFileIds []string + if len(fileIds) > DeletionBatchSize { + toDeleteFileIds = fileIds[:DeletionBatchSize] + fileIds = fileIds[DeletionBatchSize:] + } else { + toDeleteFileIds = fileIds + fileIds = fileIds[:0] + } + deletionCount := len(toDeleteFileIds) + _, err := operation.DeleteFilesWithLookupVolumeId(f.GrpcDialOption, toDeleteFileIds, lookupFunc) + if err != nil { + if !strings.Contains(err.Error(), "already deleted") { + glog.V(0).Infof("deleting fileIds len=%d error: %v", deletionCount, err) + } + } + } +} + +func (f *Filer) DirectDeleteChunks(chunks []*filer_pb.FileChunk) { + var fildIdsToDelete []string + for _, chunk := range chunks { + if !chunk.IsChunkManifest { + fildIdsToDelete = append(fildIdsToDelete, chunk.GetFileIdString()) + continue + } + dataChunks, manifestResolveErr := ResolveOneChunkManifest(f.MasterClient.LookupFileId, chunk) + if manifestResolveErr != nil { + glog.V(0).Infof("failed to resolve manifest %s: %v", chunk.FileId, manifestResolveErr) + } + for _, dChunk := range dataChunks { + fildIdsToDelete = append(fildIdsToDelete, dChunk.GetFileIdString()) + } + fildIdsToDelete = append(fildIdsToDelete, chunk.GetFileIdString()) + } + + f.doDeleteFileIds(fildIdsToDelete) +} + +func (f *Filer) DeleteChunks(chunks []*filer_pb.FileChunk) { + for _, chunk := range chunks { + if !chunk.IsChunkManifest { + f.fileIdDeletionQueue.EnQueue(chunk.GetFileIdString()) + continue + } + dataChunks, manifestResolveErr := ResolveOneChunkManifest(f.MasterClient.LookupFileId, chunk) + if manifestResolveErr != nil { + glog.V(0).Infof("failed to resolve manifest %s: %v", chunk.FileId, manifestResolveErr) + } + for _, dChunk := range dataChunks { + f.fileIdDeletionQueue.EnQueue(dChunk.GetFileIdString()) + } + f.fileIdDeletionQueue.EnQueue(chunk.GetFileIdString()) + } +} + +func (f *Filer) deleteChunksIfNotNew(oldEntry, newEntry *Entry) { + + if oldEntry == nil { + return + } + if newEntry == nil { + f.DeleteChunks(oldEntry.Chunks) + } + + var toDelete []*filer_pb.FileChunk + newChunkIds := make(map[string]bool) + for _, newChunk := range newEntry.Chunks { + newChunkIds[newChunk.GetFileIdString()] = true + } + + for _, oldChunk := range oldEntry.Chunks { + if _, found := newChunkIds[oldChunk.GetFileIdString()]; !found { + toDelete = append(toDelete, oldChunk) + } + } + f.DeleteChunks(toDelete) +} diff --git a/weed/filer/filer_notify.go b/weed/filer/filer_notify.go new file mode 100644 index 000000000..7ab101102 --- /dev/null +++ b/weed/filer/filer_notify.go @@ -0,0 +1,185 @@ +package filer + +import ( + "context" + "fmt" + "io" + "strings" + "time" + + "github.com/golang/protobuf/proto" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/notification" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func (f *Filer) NotifyUpdateEvent(ctx context.Context, oldEntry, newEntry *Entry, deleteChunks, isFromOtherCluster bool, signatures []int32) { + var fullpath string + if oldEntry != nil { + fullpath = string(oldEntry.FullPath) + } else if newEntry != nil { + fullpath = string(newEntry.FullPath) + } else { + return + } + + // println("fullpath:", fullpath) + + if strings.HasPrefix(fullpath, SystemLogDir) { + return + } + foundSelf := false + for _, sig := range signatures { + if sig == f.Signature { + foundSelf = true + } + } + if !foundSelf { + signatures = append(signatures, f.Signature) + } + + newParentPath := "" + if newEntry != nil { + newParentPath, _ = newEntry.FullPath.DirAndName() + } + eventNotification := &filer_pb.EventNotification{ + OldEntry: oldEntry.ToProtoEntry(), + NewEntry: newEntry.ToProtoEntry(), + DeleteChunks: deleteChunks, + NewParentPath: newParentPath, + IsFromOtherCluster: isFromOtherCluster, + Signatures: signatures, + } + + if notification.Queue != nil { + glog.V(3).Infof("notifying entry update %v", fullpath) + if err := notification.Queue.SendMessage(fullpath, eventNotification); err != nil { + // throw message + glog.Error(err) + } + } + + f.logMetaEvent(ctx, fullpath, eventNotification) + +} + +func (f *Filer) logMetaEvent(ctx context.Context, fullpath string, eventNotification *filer_pb.EventNotification) { + + dir, _ := util.FullPath(fullpath).DirAndName() + + event := &filer_pb.SubscribeMetadataResponse{ + Directory: dir, + EventNotification: eventNotification, + TsNs: time.Now().UnixNano(), + } + data, err := proto.Marshal(event) + if err != nil { + glog.Errorf("failed to marshal filer_pb.SubscribeMetadataResponse %+v: %v", event, err) + return + } + + f.LocalMetaLogBuffer.AddToBuffer([]byte(dir), data, event.TsNs) + +} + +func (f *Filer) logFlushFunc(startTime, stopTime time.Time, buf []byte) { + + if len(buf) == 0 { + return + } + + startTime, stopTime = startTime.UTC(), stopTime.UTC() + + targetFile := fmt.Sprintf("%s/%04d-%02d-%02d/%02d-%02d.segment", SystemLogDir, + startTime.Year(), startTime.Month(), startTime.Day(), startTime.Hour(), startTime.Minute(), + // startTime.Second(), startTime.Nanosecond(), + ) + + for { + if err := f.appendToFile(targetFile, buf); err != nil { + glog.V(1).Infof("log write failed %s: %v", targetFile, err) + time.Sleep(737 * time.Millisecond) + } else { + break + } + } +} + +func (f *Filer) ReadPersistedLogBuffer(startTime time.Time, eachLogEntryFn func(logEntry *filer_pb.LogEntry) error) (lastTsNs int64, err error) { + + startTime = startTime.UTC() + startDate := fmt.Sprintf("%04d-%02d-%02d", startTime.Year(), startTime.Month(), startTime.Day()) + startHourMinute := fmt.Sprintf("%02d-%02d.segment", startTime.Hour(), startTime.Minute()) + + sizeBuf := make([]byte, 4) + startTsNs := startTime.UnixNano() + + dayEntries, _, listDayErr := f.ListDirectoryEntries(context.Background(), SystemLogDir, startDate, true, 366, "", "", "") + if listDayErr != nil { + return lastTsNs, fmt.Errorf("fail to list log by day: %v", listDayErr) + } + for _, dayEntry := range dayEntries { + // println("checking day", dayEntry.FullPath) + hourMinuteEntries, _, listHourMinuteErr := f.ListDirectoryEntries(context.Background(), util.NewFullPath(SystemLogDir, dayEntry.Name()), "", false, 24*60, "", "", "") + if listHourMinuteErr != nil { + return lastTsNs, fmt.Errorf("fail to list log %s by day: %v", dayEntry.Name(), listHourMinuteErr) + } + for _, hourMinuteEntry := range hourMinuteEntries { + // println("checking hh-mm", hourMinuteEntry.FullPath) + if dayEntry.Name() == startDate { + if strings.Compare(hourMinuteEntry.Name(), startHourMinute) < 0 { + continue + } + } + // println("processing", hourMinuteEntry.FullPath) + chunkedFileReader := NewChunkStreamReaderFromFiler(f.MasterClient, hourMinuteEntry.Chunks) + if lastTsNs, err = ReadEachLogEntry(chunkedFileReader, sizeBuf, startTsNs, eachLogEntryFn); err != nil { + chunkedFileReader.Close() + if err == io.EOF { + continue + } + return lastTsNs, fmt.Errorf("reading %s: %v", hourMinuteEntry.FullPath, err) + } + chunkedFileReader.Close() + } + } + + return lastTsNs, nil +} + +func ReadEachLogEntry(r io.Reader, sizeBuf []byte, ns int64, eachLogEntryFn func(logEntry *filer_pb.LogEntry) error) (lastTsNs int64, err error) { + for { + n, err := r.Read(sizeBuf) + if err != nil { + return lastTsNs, err + } + if n != 4 { + return lastTsNs, fmt.Errorf("size %d bytes, expected 4 bytes", n) + } + size := util.BytesToUint32(sizeBuf) + // println("entry size", size) + entryData := make([]byte, size) + n, err = r.Read(entryData) + if err != nil { + return lastTsNs, err + } + if n != int(size) { + return lastTsNs, fmt.Errorf("entry data %d bytes, expected %d bytes", n, size) + } + logEntry := &filer_pb.LogEntry{} + if err = proto.Unmarshal(entryData, logEntry); err != nil { + return lastTsNs, err + } + if logEntry.TsNs <= ns { + continue + } + // println("each log: ", logEntry.TsNs) + if err := eachLogEntryFn(logEntry); err != nil { + return lastTsNs, err + } else { + lastTsNs = logEntry.TsNs + } + } +} diff --git a/weed/filer/filer_notify_append.go b/weed/filer/filer_notify_append.go new file mode 100644 index 000000000..d441bbbc9 --- /dev/null +++ b/weed/filer/filer_notify_append.go @@ -0,0 +1,75 @@ +package filer + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func (f *Filer) appendToFile(targetFile string, data []byte) error { + + assignResult, uploadResult, err2 := f.assignAndUpload(targetFile, data) + if err2 != nil { + return err2 + } + + // find out existing entry + fullpath := util.FullPath(targetFile) + entry, err := f.FindEntry(context.Background(), fullpath) + var offset int64 = 0 + if err == filer_pb.ErrNotFound { + entry = &Entry{ + FullPath: fullpath, + Attr: Attr{ + Crtime: time.Now(), + Mtime: time.Now(), + Mode: os.FileMode(0644), + Uid: OS_UID, + Gid: OS_GID, + }, + } + } else { + offset = int64(TotalSize(entry.Chunks)) + } + + // append to existing chunks + entry.Chunks = append(entry.Chunks, uploadResult.ToPbFileChunk(assignResult.Fid, offset)) + + // update the entry + err = f.CreateEntry(context.Background(), entry, false, false, nil) + + return err +} + +func (f *Filer) assignAndUpload(targetFile string, data []byte) (*operation.AssignResult, *operation.UploadResult, error) { + // assign a volume location + rule := f.FilerConf.MatchStorageRule(targetFile) + assignRequest := &operation.VolumeAssignRequest{ + Count: 1, + Collection: util.Nvl(f.metaLogCollection, rule.Collection), + Replication: util.Nvl(f.metaLogReplication, rule.Replication), + WritableVolumeCount: rule.VolumeGrowthCount, + } + + assignResult, err := operation.Assign(f.GetMaster, f.GrpcDialOption, assignRequest) + if err != nil { + return nil, nil, fmt.Errorf("AssignVolume: %v", err) + } + if assignResult.Error != "" { + return nil, nil, fmt.Errorf("AssignVolume error: %v", assignResult.Error) + } + + // upload data + targetUrl := "http://" + assignResult.Url + "/" + assignResult.Fid + uploadResult, err := operation.UploadData(targetUrl, "", f.Cipher, data, false, "", nil, assignResult.Auth) + if err != nil { + return nil, nil, fmt.Errorf("upload data %s: %v", targetUrl, err) + } + // println("uploaded to", targetUrl) + return assignResult, uploadResult, nil +} diff --git a/weed/filer/filer_notify_test.go b/weed/filer/filer_notify_test.go new file mode 100644 index 000000000..6a2be8f18 --- /dev/null +++ b/weed/filer/filer_notify_test.go @@ -0,0 +1,53 @@ +package filer + +import ( + "testing" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + + "github.com/golang/protobuf/proto" +) + +func TestProtoMarshalText(t *testing.T) { + + oldEntry := &Entry{ + FullPath: util.FullPath("/this/path/to"), + Attr: Attr{ + Mtime: time.Now(), + Mode: 0644, + Uid: 1, + Mime: "text/json", + TtlSec: 25, + }, + Chunks: []*filer_pb.FileChunk{ + &filer_pb.FileChunk{ + FileId: "234,2423423422", + Offset: 234234, + Size: 234, + Mtime: 12312423, + ETag: "2342342354", + SourceFileId: "23234,2342342342", + }, + }, + } + + notification := &filer_pb.EventNotification{ + OldEntry: oldEntry.ToProtoEntry(), + NewEntry: nil, + DeleteChunks: true, + } + + text := proto.MarshalTextString(notification) + + notification2 := &filer_pb.EventNotification{} + proto.UnmarshalText(text, notification2) + + if notification2.OldEntry.Chunks[0].SourceFileId != notification.OldEntry.Chunks[0].SourceFileId { + t.Fatalf("marshal/unmarshal error: %s", text) + } + + println(text) + +} diff --git a/weed/filer/filer_on_meta_event.go b/weed/filer/filer_on_meta_event.go new file mode 100644 index 000000000..a91faeb24 --- /dev/null +++ b/weed/filer/filer_on_meta_event.go @@ -0,0 +1,82 @@ +package filer + +import ( + "bytes" + "math" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +// onMetadataChangeEvent is triggered after filer processed change events from local or remote filers +func (f *Filer) onMetadataChangeEvent(event *filer_pb.SubscribeMetadataResponse) { + f.maybeReloadFilerConfiguration(event) + f.onBucketEvents(event) +} + +func (f *Filer) onBucketEvents(event *filer_pb.SubscribeMetadataResponse) { + message := event.EventNotification + for _, sig := range message.Signatures { + if sig == f.Signature { + return + } + } + if f.DirBucketsPath == event.Directory { + if message.OldEntry == nil && message.NewEntry != nil { + f.Store.OnBucketCreation(message.NewEntry.Name) + } + if message.OldEntry != nil && message.NewEntry == nil { + f.Store.OnBucketDeletion(message.OldEntry.Name) + } + } +} + +func (f *Filer) maybeReloadFilerConfiguration(event *filer_pb.SubscribeMetadataResponse) { + if DirectoryEtcSeaweedFS != event.Directory { + if DirectoryEtcSeaweedFS != event.EventNotification.NewParentPath { + return + } + } + + entry := event.EventNotification.NewEntry + if entry == nil { + return + } + + glog.V(0).Infof("procesing %v", event) + if entry.Name == FilerConfName { + f.reloadFilerConfiguration(entry) + } +} + +func (f *Filer) readEntry(chunks []*filer_pb.FileChunk) ([]byte, error) { + var buf bytes.Buffer + err := StreamContent(f.MasterClient, &buf, chunks, 0, math.MaxInt64, false) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func (f *Filer) reloadFilerConfiguration(entry *filer_pb.Entry) { + fc := NewFilerConf() + err := fc.loadFromChunks(f, entry.Content, entry.Chunks) + if err != nil { + glog.Errorf("read filer conf chunks: %v", err) + return + } + f.FilerConf = fc +} + +func (f *Filer) LoadFilerConf() { + fc := NewFilerConf() + err := util.Retry("loadFilerConf", func() error { + return fc.loadFromFiler(f) + }) + if err != nil { + glog.Errorf("read filer conf: %v", err) + return + } + f.FilerConf = fc +} diff --git a/weed/filer/filer_rename.go b/weed/filer/filer_rename.go new file mode 100644 index 000000000..b6f0cf6de --- /dev/null +++ b/weed/filer/filer_rename.go @@ -0,0 +1,30 @@ +package filer + +import ( + "fmt" + "github.com/chrislusf/seaweedfs/weed/util" + "strings" +) + +func (f *Filer) CanRename(source, target util.FullPath) error { + sourceBucket := f.DetectBucket(source) + targetBucket := f.DetectBucket(target) + if sourceBucket != targetBucket { + return fmt.Errorf("can not move across collection %s => %s", sourceBucket, targetBucket) + } + return nil +} + +func (f *Filer) DetectBucket(source util.FullPath) (bucket string) { + if strings.HasPrefix(string(source), f.DirBucketsPath+"/") { + bucketAndObjectKey := string(source)[len(f.DirBucketsPath)+1:] + t := strings.Index(bucketAndObjectKey, "/") + if t < 0 { + bucket = bucketAndObjectKey + } + if t > 0 { + bucket = bucketAndObjectKey[:t] + } + } + return bucket +} diff --git a/weed/filer/filer_search.go b/weed/filer/filer_search.go new file mode 100644 index 000000000..f43312cfa --- /dev/null +++ b/weed/filer/filer_search.go @@ -0,0 +1,98 @@ +package filer + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/util" + "path/filepath" + "strings" +) + +func splitPattern(pattern string) (prefix string, restPattern string) { + position := strings.Index(pattern, "*") + if position >= 0 { + return pattern[:position], pattern[position:] + } + position = strings.Index(pattern, "?") + if position >= 0 { + return pattern[:position], pattern[position:] + } + return "", restPattern +} + +// For now, prefix and namePattern are mutually exclusive +func (f *Filer) ListDirectoryEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int64, prefix string, namePattern string, namePatternExclude string) (entries []*Entry, hasMore bool, err error) { + + _, err = f.StreamListDirectoryEntries(ctx, p, startFileName, inclusive, limit+1, prefix, namePattern, namePatternExclude, func(entry *Entry) bool { + entries = append(entries, entry) + return true + }) + + hasMore = int64(len(entries)) >= limit+1 + if hasMore { + entries = entries[:limit] + } + + return entries, hasMore, err +} + +// For now, prefix and namePattern are mutually exclusive +func (f *Filer) StreamListDirectoryEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int64, prefix string, namePattern string, namePatternExclude string, eachEntryFunc ListEachEntryFunc) (lastFileName string, err error) { + if strings.HasSuffix(string(p), "/") && len(p) > 1 { + p = p[0 : len(p)-1] + } + + prefixInNamePattern, restNamePattern := splitPattern(namePattern) + if prefixInNamePattern != "" { + prefix = prefixInNamePattern + } + var missedCount int64 + + missedCount, lastFileName, err = f.doListPatternMatchedEntries(ctx, p, startFileName, inclusive, limit, prefix, restNamePattern, namePatternExclude, eachEntryFunc) + + for missedCount > 0 && err == nil { + missedCount, lastFileName, err = f.doListPatternMatchedEntries(ctx, p, lastFileName, false, missedCount, prefix, restNamePattern, namePatternExclude, eachEntryFunc) + } + + return +} + +func (f *Filer) doListPatternMatchedEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int64, prefix, restNamePattern string, namePatternExclude string, eachEntryFunc ListEachEntryFunc) (missedCount int64, lastFileName string, err error) { + + if len(restNamePattern) == 0 && len(namePatternExclude) == 0{ + lastFileName, err = f.doListValidEntries(ctx, p, startFileName, inclusive, limit, prefix, eachEntryFunc) + return 0, lastFileName, err + } + + lastFileName, err = f.doListValidEntries(ctx, p, startFileName, inclusive, limit, prefix, func(entry *Entry) bool { + nameToTest := entry.Name() + if len(namePatternExclude) > 0 { + if matched, matchErr := filepath.Match(namePatternExclude, nameToTest); matchErr == nil && matched { + missedCount++ + return true + } + } + if len(restNamePattern) > 0 { + if matched, matchErr := filepath.Match(restNamePattern, nameToTest[len(prefix):]); matchErr == nil && !matched { + missedCount++ + return true + } + } + if !eachEntryFunc(entry) { + return false + } + return true + }) + if err != nil { + return + } + return +} + +func (f *Filer) doListValidEntries(ctx context.Context, p util.FullPath, startFileName string, inclusive bool, limit int64, prefix string, eachEntryFunc ListEachEntryFunc) (lastFileName string, err error) { + var expiredCount int64 + expiredCount, lastFileName, err = f.doListDirectoryEntries(ctx, p, startFileName, inclusive, limit, prefix, eachEntryFunc) + for expiredCount > 0 && err == nil { + expiredCount, lastFileName, err = f.doListDirectoryEntries(ctx, p, lastFileName, false, expiredCount, prefix, eachEntryFunc) + } + return +} diff --git a/weed/filer/filerstore.go b/weed/filer/filerstore.go new file mode 100644 index 000000000..a5b2f25de --- /dev/null +++ b/weed/filer/filerstore.go @@ -0,0 +1,46 @@ +package filer + +import ( + "context" + "errors" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + ErrUnsupportedListDirectoryPrefixed = errors.New("unsupported directory prefix listing") + ErrUnsupportedSuperLargeDirectoryListing = errors.New("unsupported super large directory listing") + ErrKvNotImplemented = errors.New("kv not implemented yet") + ErrKvNotFound = errors.New("kv: not found") +) + +type ListEachEntryFunc func(entry *Entry) bool + +type FilerStore interface { + // GetName gets the name to locate the configuration in filer.toml file + GetName() string + // Initialize initializes the file store + Initialize(configuration util.Configuration, prefix string) error + InsertEntry(context.Context, *Entry) error + UpdateEntry(context.Context, *Entry) (err error) + // err == filer_pb.ErrNotFound if not found + FindEntry(context.Context, util.FullPath) (entry *Entry, err error) + DeleteEntry(context.Context, util.FullPath) (err error) + DeleteFolderChildren(context.Context, util.FullPath) (err error) + ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc ListEachEntryFunc) (lastFileName string, err error) + ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc ListEachEntryFunc) (lastFileName string, err error) + + BeginTransaction(ctx context.Context) (context.Context, error) + CommitTransaction(ctx context.Context) error + RollbackTransaction(ctx context.Context) error + + KvPut(ctx context.Context, key []byte, value []byte) (err error) + KvGet(ctx context.Context, key []byte) (value []byte, err error) + KvDelete(ctx context.Context, key []byte) (err error) + + Shutdown() +} + +type BucketAware interface { + OnBucketCreation(bucket string) + OnBucketDeletion(bucket string) +} diff --git a/weed/filer/filerstore_hardlink.go b/weed/filer/filerstore_hardlink.go new file mode 100644 index 000000000..316c76a0c --- /dev/null +++ b/weed/filer/filerstore_hardlink.go @@ -0,0 +1,102 @@ +package filer + +import ( + "bytes" + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func (fsw *FilerStoreWrapper) handleUpdateToHardLinks(ctx context.Context, entry *Entry) error { + if len(entry.HardLinkId) == 0 { + return nil + } + // handle hard links + if err := fsw.setHardLink(ctx, entry); err != nil { + return fmt.Errorf("setHardLink %d: %v", entry.HardLinkId, err) + } + + // check what is existing entry + glog.V(4).Infof("handleUpdateToHardLinks FindEntry %s", entry.FullPath) + actualStore := fsw.getActualStore(entry.FullPath) + existingEntry, err := actualStore.FindEntry(ctx, entry.FullPath) + if err != nil && err != filer_pb.ErrNotFound { + return fmt.Errorf("update existing entry %s: %v", entry.FullPath, err) + } + + // remove old hard link + if err == nil && len(existingEntry.HardLinkId) != 0 && bytes.Compare(existingEntry.HardLinkId, entry.HardLinkId) != 0 { + glog.V(4).Infof("handleUpdateToHardLinks DeleteHardLink %s", entry.FullPath) + if err = fsw.DeleteHardLink(ctx, existingEntry.HardLinkId); err != nil { + return err + } + } + return nil +} + +func (fsw *FilerStoreWrapper) setHardLink(ctx context.Context, entry *Entry) error { + if len(entry.HardLinkId) == 0 { + return nil + } + key := entry.HardLinkId + + newBlob, encodeErr := entry.EncodeAttributesAndChunks() + if encodeErr != nil { + return encodeErr + } + + return fsw.KvPut(ctx, key, newBlob) +} + +func (fsw *FilerStoreWrapper) maybeReadHardLink(ctx context.Context, entry *Entry) error { + if len(entry.HardLinkId) == 0 { + return nil + } + key := entry.HardLinkId + + glog.V(4).Infof("maybeReadHardLink KvGet %v", key) + value, err := fsw.KvGet(ctx, key) + if err != nil { + glog.Errorf("read %s hardlink %d: %v", entry.FullPath, entry.HardLinkId, err) + return err + } + + if err = entry.DecodeAttributesAndChunks(value); err != nil { + glog.Errorf("decode %s hardlink %d: %v", entry.FullPath, entry.HardLinkId, err) + return err + } + + return nil +} + +func (fsw *FilerStoreWrapper) DeleteHardLink(ctx context.Context, hardLinkId HardLinkId) error { + key := hardLinkId + value, err := fsw.KvGet(ctx, key) + if err == ErrKvNotFound { + return nil + } + if err != nil { + return err + } + + entry := &Entry{} + if err = entry.DecodeAttributesAndChunks(value); err != nil { + return err + } + + entry.HardLinkCounter-- + if entry.HardLinkCounter <= 0 { + glog.V(4).Infof("DeleteHardLink KvDelete %v", key) + return fsw.KvDelete(ctx, key) + } + + newBlob, encodeErr := entry.EncodeAttributesAndChunks() + if encodeErr != nil { + return encodeErr + } + + glog.V(4).Infof("DeleteHardLink KvPut %v", key) + return fsw.KvPut(ctx, key, newBlob) + +} diff --git a/weed/filer/filerstore_translate_path.go b/weed/filer/filerstore_translate_path.go new file mode 100644 index 000000000..00bf82ed4 --- /dev/null +++ b/weed/filer/filerstore_translate_path.go @@ -0,0 +1,153 @@ +package filer + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/util" + "strings" +) + +var ( + _ = FilerStore(&FilerStorePathTranlator{}) +) + +type FilerStorePathTranlator struct { + actualStore FilerStore + storeRoot string +} + +func NewFilerStorePathTranlator(storeRoot string, store FilerStore) *FilerStorePathTranlator { + if innerStore, ok := store.(*FilerStorePathTranlator); ok { + return innerStore + } + + if !strings.HasSuffix(storeRoot, "/") { + storeRoot += "/" + } + + return &FilerStorePathTranlator{ + actualStore: store, + storeRoot: storeRoot, + } +} + +func (t *FilerStorePathTranlator) translatePath(fp util.FullPath) (newPath util.FullPath) { + newPath = fp + if t.storeRoot == "/" { + return + } + newPath = fp[len(t.storeRoot)-1:] + if newPath == "" { + newPath = "/" + } + return +} +func (t *FilerStorePathTranlator) changeEntryPath(entry *Entry) (previousPath util.FullPath) { + previousPath = entry.FullPath + if t.storeRoot == "/" { + return + } + entry.FullPath = t.translatePath(previousPath) + return +} +func (t *FilerStorePathTranlator) recoverEntryPath(entry *Entry, previousPath util.FullPath) { + entry.FullPath = previousPath +} + +func (t *FilerStorePathTranlator) GetName() string { + return t.actualStore.GetName() +} + +func (t *FilerStorePathTranlator) Initialize(configuration util.Configuration, prefix string) error { + return t.actualStore.Initialize(configuration, prefix) +} + +func (t *FilerStorePathTranlator) InsertEntry(ctx context.Context, entry *Entry) error { + previousPath := t.changeEntryPath(entry) + defer t.recoverEntryPath(entry, previousPath) + + return t.actualStore.InsertEntry(ctx, entry) +} + +func (t *FilerStorePathTranlator) UpdateEntry(ctx context.Context, entry *Entry) error { + previousPath := t.changeEntryPath(entry) + defer t.recoverEntryPath(entry, previousPath) + + return t.actualStore.UpdateEntry(ctx, entry) +} + +func (t *FilerStorePathTranlator) FindEntry(ctx context.Context, fp util.FullPath) (entry *Entry, err error) { + if t.storeRoot == "/" { + return t.actualStore.FindEntry(ctx, fp) + } + newFullPath := t.translatePath(fp) + entry, err = t.actualStore.FindEntry(ctx, newFullPath) + if err == nil { + entry.FullPath = fp[:len(t.storeRoot)-1] + entry.FullPath + } + return +} + +func (t *FilerStorePathTranlator) DeleteEntry(ctx context.Context, fp util.FullPath) (err error) { + newFullPath := t.translatePath(fp) + return t.actualStore.DeleteEntry(ctx, newFullPath) +} + +func (t *FilerStorePathTranlator) DeleteOneEntry(ctx context.Context, existingEntry *Entry) (err error) { + + previousPath := t.changeEntryPath(existingEntry) + defer t.recoverEntryPath(existingEntry, previousPath) + + return t.actualStore.DeleteEntry(ctx, existingEntry.FullPath) +} + +func (t *FilerStorePathTranlator) DeleteFolderChildren(ctx context.Context, fp util.FullPath) (err error) { + newFullPath := t.translatePath(fp) + + return t.actualStore.DeleteFolderChildren(ctx, newFullPath) +} + +func (t *FilerStorePathTranlator) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc ListEachEntryFunc) (string, error) { + + newFullPath := t.translatePath(dirPath) + + return t.actualStore.ListDirectoryEntries(ctx, newFullPath, startFileName, includeStartFile, limit, func(entry *Entry) bool { + entry.FullPath = dirPath[:len(t.storeRoot)-1] + entry.FullPath + return eachEntryFunc(entry) + }) +} + +func (t *FilerStorePathTranlator) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc ListEachEntryFunc) (string, error) { + + newFullPath := t.translatePath(dirPath) + + return t.actualStore.ListDirectoryPrefixedEntries(ctx, newFullPath, startFileName, includeStartFile, limit, prefix, func(entry *Entry) bool { + entry.FullPath = dirPath[:len(t.storeRoot)-1] + entry.FullPath + return eachEntryFunc(entry) + }) +} + +func (t *FilerStorePathTranlator) BeginTransaction(ctx context.Context) (context.Context, error) { + return t.actualStore.BeginTransaction(ctx) +} + +func (t *FilerStorePathTranlator) CommitTransaction(ctx context.Context) error { + return t.actualStore.CommitTransaction(ctx) +} + +func (t *FilerStorePathTranlator) RollbackTransaction(ctx context.Context) error { + return t.actualStore.RollbackTransaction(ctx) +} + +func (t *FilerStorePathTranlator) Shutdown() { + t.actualStore.Shutdown() +} + +func (t *FilerStorePathTranlator) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + return t.actualStore.KvPut(ctx, key, value) +} +func (t *FilerStorePathTranlator) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + return t.actualStore.KvGet(ctx, key) +} +func (t *FilerStorePathTranlator) KvDelete(ctx context.Context, key []byte) (err error) { + return t.actualStore.KvDelete(ctx, key) +} diff --git a/weed/filer/filerstore_wrapper.go b/weed/filer/filerstore_wrapper.go new file mode 100644 index 000000000..cd7c0bea3 --- /dev/null +++ b/weed/filer/filerstore_wrapper.go @@ -0,0 +1,322 @@ +package filer + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/viant/ptrie" + "strings" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + _ = VirtualFilerStore(&FilerStoreWrapper{}) +) + +type VirtualFilerStore interface { + FilerStore + DeleteHardLink(ctx context.Context, hardLinkId HardLinkId) error + DeleteOneEntry(ctx context.Context, entry *Entry) error + AddPathSpecificStore(path string, storeId string, store FilerStore) + OnBucketCreation(bucket string) + OnBucketDeletion(bucket string) +} + +type FilerStoreWrapper struct { + defaultStore FilerStore + pathToStore ptrie.Trie + storeIdToStore map[string]FilerStore +} + +func NewFilerStoreWrapper(store FilerStore) *FilerStoreWrapper { + if innerStore, ok := store.(*FilerStoreWrapper); ok { + return innerStore + } + return &FilerStoreWrapper{ + defaultStore: store, + pathToStore: ptrie.New(), + storeIdToStore: make(map[string]FilerStore), + } +} + +func (fsw *FilerStoreWrapper) OnBucketCreation(bucket string) { + for _, store := range fsw.storeIdToStore { + if ba, ok := store.(BucketAware); ok { + ba.OnBucketCreation(bucket) + } + } + if ba, ok := fsw.defaultStore.(BucketAware); ok { + ba.OnBucketCreation(bucket) + } +} +func (fsw *FilerStoreWrapper) OnBucketDeletion(bucket string) { + for _, store := range fsw.storeIdToStore { + if ba, ok := store.(BucketAware); ok { + ba.OnBucketDeletion(bucket) + } + } + if ba, ok := fsw.defaultStore.(BucketAware); ok { + ba.OnBucketDeletion(bucket) + } +} + +func (fsw *FilerStoreWrapper) AddPathSpecificStore(path string, storeId string, store FilerStore) { + fsw.storeIdToStore[storeId] = NewFilerStorePathTranlator(path, store) + err := fsw.pathToStore.Put([]byte(path), storeId) + if err != nil { + glog.Fatalf("put path specific store: %v", err) + } +} + +func (fsw *FilerStoreWrapper) getActualStore(path util.FullPath) (store FilerStore) { + store = fsw.defaultStore + if path == "/" { + return + } + var storeId string + fsw.pathToStore.MatchPrefix([]byte(path), func(key []byte, value interface{}) bool { + storeId = value.(string) + return false + }) + if storeId != "" { + store = fsw.storeIdToStore[storeId] + } + return +} + +func (fsw *FilerStoreWrapper) getDefaultStore() (store FilerStore) { + return fsw.defaultStore +} + +func (fsw *FilerStoreWrapper) GetName() string { + return fsw.getDefaultStore().GetName() +} + +func (fsw *FilerStoreWrapper) Initialize(configuration util.Configuration, prefix string) error { + return fsw.getDefaultStore().Initialize(configuration, prefix) +} + +func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) error { + actualStore := fsw.getActualStore(entry.FullPath) + stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "insert").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "insert").Observe(time.Since(start).Seconds()) + }() + + filer_pb.BeforeEntrySerialization(entry.Chunks) + if entry.Mime == "application/octet-stream" { + entry.Mime = "" + } + + if err := fsw.handleUpdateToHardLinks(ctx, entry); err != nil { + return err + } + + glog.V(4).Infof("InsertEntry %s", entry.FullPath) + return actualStore.InsertEntry(ctx, entry) +} + +func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) error { + actualStore := fsw.getActualStore(entry.FullPath) + stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "update").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "update").Observe(time.Since(start).Seconds()) + }() + + filer_pb.BeforeEntrySerialization(entry.Chunks) + if entry.Mime == "application/octet-stream" { + entry.Mime = "" + } + + if err := fsw.handleUpdateToHardLinks(ctx, entry); err != nil { + return err + } + + glog.V(4).Infof("UpdateEntry %s", entry.FullPath) + return actualStore.UpdateEntry(ctx, entry) +} + +func (fsw *FilerStoreWrapper) FindEntry(ctx context.Context, fp util.FullPath) (entry *Entry, err error) { + actualStore := fsw.getActualStore(fp) + stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "find").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "find").Observe(time.Since(start).Seconds()) + }() + + entry, err = actualStore.FindEntry(ctx, fp) + glog.V(4).Infof("FindEntry %s: %v", fp, err) + if err != nil { + return nil, err + } + + fsw.maybeReadHardLink(ctx, entry) + + filer_pb.AfterEntryDeserialization(entry.Chunks) + return +} + +func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp util.FullPath) (err error) { + actualStore := fsw.getActualStore(fp) + stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "delete").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "delete").Observe(time.Since(start).Seconds()) + }() + + existingEntry, findErr := fsw.FindEntry(ctx, fp) + if findErr == filer_pb.ErrNotFound { + return nil + } + if len(existingEntry.HardLinkId) != 0 { + // remove hard link + glog.V(4).Infof("DeleteHardLink %s", existingEntry.FullPath) + if err = fsw.DeleteHardLink(ctx, existingEntry.HardLinkId); err != nil { + return err + } + } + + glog.V(4).Infof("DeleteEntry %s", fp) + return actualStore.DeleteEntry(ctx, fp) +} + +func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry *Entry) (err error) { + actualStore := fsw.getActualStore(existingEntry.FullPath) + stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "delete").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "delete").Observe(time.Since(start).Seconds()) + }() + + if len(existingEntry.HardLinkId) != 0 { + // remove hard link + glog.V(4).Infof("DeleteHardLink %s", existingEntry.FullPath) + if err = fsw.DeleteHardLink(ctx, existingEntry.HardLinkId); err != nil { + return err + } + } + + glog.V(4).Infof("DeleteOneEntry %s", existingEntry.FullPath) + return actualStore.DeleteEntry(ctx, existingEntry.FullPath) +} + +func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp util.FullPath) (err error) { + actualStore := fsw.getActualStore(fp + "/") + stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "deleteFolderChildren").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "deleteFolderChildren").Observe(time.Since(start).Seconds()) + }() + + glog.V(4).Infof("DeleteFolderChildren %s", fp) + return actualStore.DeleteFolderChildren(ctx, fp) +} + +func (fsw *FilerStoreWrapper) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc ListEachEntryFunc) (string, error) { + actualStore := fsw.getActualStore(dirPath + "/") + stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "list").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "list").Observe(time.Since(start).Seconds()) + }() + + glog.V(4).Infof("ListDirectoryEntries %s from %s limit %d", dirPath, startFileName, limit) + return actualStore.ListDirectoryEntries(ctx, dirPath, startFileName, includeStartFile, limit, func(entry *Entry) bool { + fsw.maybeReadHardLink(ctx, entry) + filer_pb.AfterEntryDeserialization(entry.Chunks) + return eachEntryFunc(entry) + }) +} + +func (fsw *FilerStoreWrapper) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc ListEachEntryFunc) (lastFileName string, err error) { + actualStore := fsw.getActualStore(dirPath + "/") + stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "prefixList").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "prefixList").Observe(time.Since(start).Seconds()) + }() + glog.V(4).Infof("ListDirectoryPrefixedEntries %s from %s prefix %s limit %d", dirPath, startFileName, prefix, limit) + lastFileName, err = actualStore.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, prefix, eachEntryFunc) + if err == ErrUnsupportedListDirectoryPrefixed { + lastFileName, err = fsw.prefixFilterEntries(ctx, dirPath, startFileName, includeStartFile, limit, prefix, func(entry *Entry) bool { + fsw.maybeReadHardLink(ctx, entry) + filer_pb.AfterEntryDeserialization(entry.Chunks) + return eachEntryFunc(entry) + }) + } + return lastFileName, err +} + +func (fsw *FilerStoreWrapper) prefixFilterEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc ListEachEntryFunc) (lastFileName string, err error) { + actualStore := fsw.getActualStore(dirPath + "/") + + if prefix == "" { + return actualStore.ListDirectoryEntries(ctx, dirPath, startFileName, includeStartFile, limit, eachEntryFunc) + } + + var notPrefixed []*Entry + lastFileName, err = actualStore.ListDirectoryEntries(ctx, dirPath, startFileName, includeStartFile, limit, func(entry *Entry) bool { + notPrefixed = append(notPrefixed, entry) + return true + }) + if err != nil { + return + } + + count := int64(0) + for count < limit && len(notPrefixed) > 0 { + for _, entry := range notPrefixed { + if strings.HasPrefix(entry.Name(), prefix) { + count++ + if !eachEntryFunc(entry) { + return + } + if count >= limit { + break + } + } + } + if count < limit { + notPrefixed = notPrefixed[:0] + _, err = actualStore.ListDirectoryEntries(ctx, dirPath, lastFileName, false, limit, func(entry *Entry) bool { + notPrefixed = append(notPrefixed, entry) + return true + }) + if err != nil { + return + } + } + } + return +} + +func (fsw *FilerStoreWrapper) BeginTransaction(ctx context.Context) (context.Context, error) { + return fsw.getDefaultStore().BeginTransaction(ctx) +} + +func (fsw *FilerStoreWrapper) CommitTransaction(ctx context.Context) error { + return fsw.getDefaultStore().CommitTransaction(ctx) +} + +func (fsw *FilerStoreWrapper) RollbackTransaction(ctx context.Context) error { + return fsw.getDefaultStore().RollbackTransaction(ctx) +} + +func (fsw *FilerStoreWrapper) Shutdown() { + fsw.getDefaultStore().Shutdown() +} + +func (fsw *FilerStoreWrapper) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + return fsw.getDefaultStore().KvPut(ctx, key, value) +} +func (fsw *FilerStoreWrapper) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + return fsw.getDefaultStore().KvGet(ctx, key) +} +func (fsw *FilerStoreWrapper) KvDelete(ctx context.Context, key []byte) (err error) { + return fsw.getDefaultStore().KvDelete(ctx, key) +} diff --git a/weed/filer/hbase/hbase_store.go b/weed/filer/hbase/hbase_store.go new file mode 100644 index 000000000..e0d878ca7 --- /dev/null +++ b/weed/filer/hbase/hbase_store.go @@ -0,0 +1,231 @@ +package hbase + +import ( + "bytes" + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/tsuna/gohbase" + "github.com/tsuna/gohbase/hrpc" + "io" +) + +func init() { + filer.Stores = append(filer.Stores, &HbaseStore{}) +} + +type HbaseStore struct { + Client gohbase.Client + table []byte + cfKv string + cfMetaDir string + column string +} + +func (store *HbaseStore) GetName() string { + return "hbase" +} + +func (store *HbaseStore) Initialize(configuration util.Configuration, prefix string) (err error) { + return store.initialize( + configuration.GetString(prefix+"zkquorum"), + configuration.GetString(prefix+"table"), + ) +} + +func (store *HbaseStore) initialize(zkquorum, table string) (err error) { + store.Client = gohbase.NewClient(zkquorum) + store.table = []byte(table) + store.cfKv = "kv" + store.cfMetaDir = "meta" + store.column = "a" + + // check table exists + key := "whatever" + headers := map[string][]string{store.cfMetaDir: nil} + get, err := hrpc.NewGet(context.Background(), store.table, []byte(key), hrpc.Families(headers)) + if err != nil { + return fmt.Errorf("NewGet returned an error: %v", err) + } + _, err = store.Client.Get(get) + if err != gohbase.TableNotFound { + return nil + } + + // create table + adminClient := gohbase.NewAdminClient(zkquorum) + cFamilies := []string{store.cfKv, store.cfMetaDir} + cf := make(map[string]map[string]string, len(cFamilies)) + for _, f := range cFamilies { + cf[f] = nil + } + ct := hrpc.NewCreateTable(context.Background(), []byte(table), cf) + if err := adminClient.CreateTable(ct); err != nil { + return err + } + + return nil +} + +func (store *HbaseStore) InsertEntry(ctx context.Context, entry *filer.Entry) error { + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + if len(entry.Chunks) > 50 { + value = util.MaybeGzipData(value) + } + + return store.doPut(ctx, store.cfMetaDir, []byte(entry.FullPath), value, entry.TtlSec) +} + +func (store *HbaseStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + return store.InsertEntry(ctx, entry) +} + +func (store *HbaseStore) FindEntry(ctx context.Context, path util.FullPath) (entry *filer.Entry, err error) { + value, err := store.doGet(ctx, store.cfMetaDir, []byte(path)) + if err != nil { + if err == filer.ErrKvNotFound { + return nil, filer_pb.ErrNotFound + } + return nil, err + } + + entry = &filer.Entry{ + FullPath: path, + } + err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(value)) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + return entry, nil +} + +func (store *HbaseStore) DeleteEntry(ctx context.Context, path util.FullPath) (err error) { + return store.doDelete(ctx, store.cfMetaDir, []byte(path)) +} + +func (store *HbaseStore) DeleteFolderChildren(ctx context.Context, path util.FullPath) (err error) { + + family := map[string][]string{store.cfMetaDir: {COLUMN_NAME}} + expectedPrefix := []byte(path.Child("")) + scan, err := hrpc.NewScanRange(ctx, store.table, expectedPrefix, nil, hrpc.Families(family)) + if err != nil { + return err + } + + scanner := store.Client.Scan(scan) + defer scanner.Close() + for { + res, err := scanner.Next() + if err != nil { + break + } + if len(res.Cells) == 0 { + continue + } + cell := res.Cells[0] + + if !bytes.HasPrefix(cell.Row, expectedPrefix) { + break + } + fullpath := util.FullPath(cell.Row) + dir, _ := fullpath.DirAndName() + if dir != string(path) { + continue + } + + err = store.doDelete(ctx, store.cfMetaDir, cell.Row) + if err != nil { + break + } + + } + return +} + +func (store *HbaseStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (string, error) { + return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc) +} + +func (store *HbaseStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + family := map[string][]string{store.cfMetaDir: {COLUMN_NAME}} + expectedPrefix := []byte(dirPath.Child(prefix)) + scan, err := hrpc.NewScanRange(ctx, store.table, expectedPrefix, nil, hrpc.Families(family)) + if err != nil { + return lastFileName, err + } + + scanner := store.Client.Scan(scan) + defer scanner.Close() + for { + res, err := scanner.Next() + if err == io.EOF { + break + } + if err != nil { + return lastFileName, err + } + if len(res.Cells) == 0 { + continue + } + cell := res.Cells[0] + + if !bytes.HasPrefix(cell.Row, expectedPrefix) { + break + } + + fullpath := util.FullPath(cell.Row) + dir, fileName := fullpath.DirAndName() + if dir != string(dirPath) { + continue + } + + value := cell.Value + + if fileName == startFileName && !includeStartFile { + continue + } + + limit-- + if limit < 0 { + break + } + + lastFileName = fileName + + entry := &filer.Entry{ + FullPath: fullpath, + } + if decodeErr := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(value)); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + break + } + if !eachEntryFunc(entry) { + break + } + } + + return lastFileName, nil +} + +func (store *HbaseStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} + +func (store *HbaseStore) CommitTransaction(ctx context.Context) error { + return nil +} + +func (store *HbaseStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *HbaseStore) Shutdown() { + store.Client.Close() +} diff --git a/weed/filer/hbase/hbase_store_kv.go b/weed/filer/hbase/hbase_store_kv.go new file mode 100644 index 000000000..990e55a24 --- /dev/null +++ b/weed/filer/hbase/hbase_store_kv.go @@ -0,0 +1,76 @@ +package hbase + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/tsuna/gohbase/hrpc" + "time" +) + +const ( + COLUMN_NAME = "a" +) + +func (store *HbaseStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + return store.doPut(ctx, store.cfKv, key, value, 0) +} + +func (store *HbaseStore) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + return store.doGet(ctx, store.cfKv, key) +} + +func (store *HbaseStore) KvDelete(ctx context.Context, key []byte) (err error) { + return store.doDelete(ctx, store.cfKv, key) +} + +func (store *HbaseStore) doPut(ctx context.Context, cf string, key, value []byte, ttlSecond int32) (err error) { + if ttlSecond > 0 { + return store.doPutWithOptions(ctx, cf, key, value, hrpc.Durability(hrpc.AsyncWal), hrpc.TTL(time.Duration(ttlSecond)*time.Second)) + } + return store.doPutWithOptions(ctx, cf, key, value, hrpc.Durability(hrpc.AsyncWal)) +} + +func (store *HbaseStore) doPutWithOptions(ctx context.Context, cf string, key, value []byte, options ...func(hrpc.Call) error) (err error) { + values := map[string]map[string][]byte{cf: map[string][]byte{}} + values[cf][COLUMN_NAME] = value + putRequest, err := hrpc.NewPut(ctx, store.table, key, values, options...) + if err != nil { + return err + } + _, err = store.Client.Put(putRequest) + if err != nil { + return err + } + return nil +} + +func (store *HbaseStore) doGet(ctx context.Context, cf string, key []byte) (value []byte, err error) { + family := map[string][]string{cf: {COLUMN_NAME}} + getRequest, err := hrpc.NewGet(context.Background(), store.table, key, hrpc.Families(family)) + if err != nil { + return nil, err + } + getResp, err := store.Client.Get(getRequest) + if err != nil { + return nil, err + } + if len(getResp.Cells) == 0 { + return nil, filer.ErrKvNotFound + } + + return getResp.Cells[0].Value, nil +} + +func (store *HbaseStore) doDelete(ctx context.Context, cf string, key []byte) (err error) { + values := map[string]map[string][]byte{cf: map[string][]byte{}} + values[cf][COLUMN_NAME] = nil + deleteRequest, err := hrpc.NewDel(ctx, store.table, key, values, hrpc.Durability(hrpc.AsyncWal)) + if err != nil { + return err + } + _, err = store.Client.Delete(deleteRequest) + if err != nil { + return err + } + return nil +} diff --git a/weed/filer/leveldb/leveldb_store.go b/weed/filer/leveldb/leveldb_store.go new file mode 100644 index 000000000..ce454f36a --- /dev/null +++ b/weed/filer/leveldb/leveldb_store.go @@ -0,0 +1,243 @@ +package leveldb + +import ( + "bytes" + "context" + "fmt" + "github.com/syndtr/goleveldb/leveldb" + leveldb_errors "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/opt" + leveldb_util "github.com/syndtr/goleveldb/leveldb/util" + "os" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + weed_util "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + DIR_FILE_SEPARATOR = byte(0x00) +) + +func init() { + filer.Stores = append(filer.Stores, &LevelDBStore{}) +} + +type LevelDBStore struct { + db *leveldb.DB +} + +func (store *LevelDBStore) GetName() string { + return "leveldb" +} + +func (store *LevelDBStore) Initialize(configuration weed_util.Configuration, prefix string) (err error) { + dir := configuration.GetString(prefix + "dir") + return store.initialize(dir) +} + +func (store *LevelDBStore) initialize(dir string) (err error) { + glog.Infof("filer store dir: %s", dir) + os.MkdirAll(dir, 0755) + if err := weed_util.TestFolderWritable(dir); err != nil { + return fmt.Errorf("Check Level Folder %s Writable: %s", dir, err) + } + + opts := &opt.Options{ + BlockCacheCapacity: 32 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 16 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 10, + } + + if store.db, err = leveldb.OpenFile(dir, opts); err != nil { + if leveldb_errors.IsCorrupted(err) { + store.db, err = leveldb.RecoverFile(dir, opts) + } + if err != nil { + glog.Infof("filer store open dir %s: %v", dir, err) + return + } + } + return +} + +func (store *LevelDBStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *LevelDBStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *LevelDBStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *LevelDBStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + key := genKey(entry.DirAndName()) + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + if len(entry.Chunks) > 50 { + value = weed_util.MaybeGzipData(value) + } + + err = store.db.Put(key, value, nil) + + if err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + // println("saved", entry.FullPath, "chunks", len(entry.Chunks)) + + return nil +} + +func (store *LevelDBStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + return store.InsertEntry(ctx, entry) +} + +func (store *LevelDBStore) FindEntry(ctx context.Context, fullpath weed_util.FullPath) (entry *filer.Entry, err error) { + key := genKey(fullpath.DirAndName()) + + data, err := store.db.Get(key, nil) + + if err == leveldb.ErrNotFound { + return nil, filer_pb.ErrNotFound + } + if err != nil { + return nil, fmt.Errorf("get %s : %v", fullpath, err) + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(weed_util.MaybeDecompressData((data))) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + // println("read", entry.FullPath, "chunks", len(entry.Chunks), "data", len(data), string(data)) + + return entry, nil +} + +func (store *LevelDBStore) DeleteEntry(ctx context.Context, fullpath weed_util.FullPath) (err error) { + key := genKey(fullpath.DirAndName()) + + err = store.db.Delete(key, nil) + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *LevelDBStore) DeleteFolderChildren(ctx context.Context, fullpath weed_util.FullPath) (err error) { + + batch := new(leveldb.Batch) + + directoryPrefix := genDirectoryKeyPrefix(fullpath, "") + iter := store.db.NewIterator(&leveldb_util.Range{Start: directoryPrefix}, nil) + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, directoryPrefix) { + break + } + fileName := getNameFromKey(key) + if fileName == "" { + continue + } + batch.Delete([]byte(genKey(string(fullpath), fileName))) + } + iter.Release() + + err = store.db.Write(batch, nil) + + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *LevelDBStore) ListDirectoryEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc) +} + +func (store *LevelDBStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + directoryPrefix := genDirectoryKeyPrefix(dirPath, prefix) + lastFileStart := directoryPrefix + if startFileName != "" { + lastFileStart = genDirectoryKeyPrefix(dirPath, startFileName) + } + + iter := store.db.NewIterator(&leveldb_util.Range{Start: lastFileStart}, nil) + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, directoryPrefix) { + break + } + fileName := getNameFromKey(key) + if fileName == "" { + continue + } + if fileName == startFileName && !includeStartFile { + continue + } + limit-- + if limit < 0 { + break + } + lastFileName = fileName + entry := &filer.Entry{ + FullPath: weed_util.NewFullPath(string(dirPath), fileName), + } + if decodeErr := entry.DecodeAttributesAndChunks(weed_util.MaybeDecompressData(iter.Value())); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + break + } + if !eachEntryFunc(entry) { + break + } + } + iter.Release() + + return lastFileName, err +} + +func genKey(dirPath, fileName string) (key []byte) { + key = []byte(dirPath) + key = append(key, DIR_FILE_SEPARATOR) + key = append(key, []byte(fileName)...) + return key +} + +func genDirectoryKeyPrefix(fullpath weed_util.FullPath, startFileName string) (keyPrefix []byte) { + keyPrefix = []byte(string(fullpath)) + keyPrefix = append(keyPrefix, DIR_FILE_SEPARATOR) + if len(startFileName) > 0 { + keyPrefix = append(keyPrefix, []byte(startFileName)...) + } + return keyPrefix +} + +func getNameFromKey(key []byte) string { + + sepIndex := len(key) - 1 + for sepIndex >= 0 && key[sepIndex] != DIR_FILE_SEPARATOR { + sepIndex-- + } + + return string(key[sepIndex+1:]) + +} + +func (store *LevelDBStore) Shutdown() { + store.db.Close() +} diff --git a/weed/filer/leveldb/leveldb_store_kv.go b/weed/filer/leveldb/leveldb_store_kv.go new file mode 100644 index 000000000..f686cbf21 --- /dev/null +++ b/weed/filer/leveldb/leveldb_store_kv.go @@ -0,0 +1,45 @@ +package leveldb + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/syndtr/goleveldb/leveldb" +) + +func (store *LevelDBStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + err = store.db.Put(key, value, nil) + + if err != nil { + return fmt.Errorf("kv put: %v", err) + } + + return nil +} + +func (store *LevelDBStore) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + + value, err = store.db.Get(key, nil) + + if err == leveldb.ErrNotFound { + return nil, filer.ErrKvNotFound + } + + if err != nil { + return nil, fmt.Errorf("kv get: %v", err) + } + + return +} + +func (store *LevelDBStore) KvDelete(ctx context.Context, key []byte) (err error) { + + err = store.db.Delete(key, nil) + + if err != nil { + return fmt.Errorf("kv delete: %v", err) + } + + return nil +} diff --git a/weed/filer/leveldb/leveldb_store_test.go b/weed/filer/leveldb/leveldb_store_test.go new file mode 100644 index 000000000..d437895f5 --- /dev/null +++ b/weed/filer/leveldb/leveldb_store_test.go @@ -0,0 +1,115 @@ +package leveldb + +import ( + "context" + "fmt" + "io/ioutil" + "os" + "testing" + "time" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func TestCreateAndFind(t *testing.T) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test") + defer os.RemoveAll(dir) + store := &LevelDBStore{} + store.initialize(dir) + testFiler.SetStore(store) + + fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg") + + ctx := context.Background() + + entry1 := &filer.Entry{ + FullPath: fullpath, + Attr: filer.Attr{ + Mode: 0440, + Uid: 1234, + Gid: 5678, + }, + } + + if err := testFiler.CreateEntry(ctx, entry1, false, false, nil); err != nil { + t.Errorf("create entry %v: %v", entry1.FullPath, err) + return + } + + entry, err := testFiler.FindEntry(ctx, fullpath) + + if err != nil { + t.Errorf("find entry: %v", err) + return + } + + if entry.FullPath != entry1.FullPath { + t.Errorf("find wrong entry: %v", entry.FullPath) + return + } + + // checking one upper directory + entries, _, _ := testFiler.ListDirectoryEntries(ctx, util.FullPath("/home/chris/this/is/one"), "", false, 100, "", "", "") + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + + // checking one upper directory + entries, _, _ = testFiler.ListDirectoryEntries(ctx, util.FullPath("/"), "", false, 100, "", "", "") + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} + +func TestEmptyRoot(t *testing.T) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test2") + defer os.RemoveAll(dir) + store := &LevelDBStore{} + store.initialize(dir) + testFiler.SetStore(store) + + ctx := context.Background() + + // checking one upper directory + entries, _, err := testFiler.ListDirectoryEntries(ctx, util.FullPath("/"), "", false, 100, "", "", "") + if err != nil { + t.Errorf("list entries: %v", err) + return + } + if len(entries) != 0 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} + +func BenchmarkInsertEntry(b *testing.B) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_bench") + defer os.RemoveAll(dir) + store := &LevelDBStore{} + store.initialize(dir) + testFiler.SetStore(store) + + ctx := context.Background() + + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + entry := &filer.Entry{ + FullPath: util.FullPath(fmt.Sprintf("/file%d.txt", i)), + Attr: filer.Attr{ + Crtime: time.Now(), + Mtime: time.Now(), + Mode: os.FileMode(0644), + }, + } + store.InsertEntry(ctx, entry) + } +} diff --git a/weed/filer/leveldb2/leveldb2_store.go b/weed/filer/leveldb2/leveldb2_store.go new file mode 100644 index 000000000..124d61c1c --- /dev/null +++ b/weed/filer/leveldb2/leveldb2_store.go @@ -0,0 +1,260 @@ +package leveldb + +import ( + "bytes" + "context" + "crypto/md5" + "fmt" + "github.com/syndtr/goleveldb/leveldb" + leveldb_errors "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/opt" + leveldb_util "github.com/syndtr/goleveldb/leveldb/util" + "io" + "os" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + weed_util "github.com/chrislusf/seaweedfs/weed/util" +) + +func init() { + filer.Stores = append(filer.Stores, &LevelDB2Store{}) +} + +type LevelDB2Store struct { + dbs []*leveldb.DB + dbCount int +} + +func (store *LevelDB2Store) GetName() string { + return "leveldb2" +} + +func (store *LevelDB2Store) Initialize(configuration weed_util.Configuration, prefix string) (err error) { + dir := configuration.GetString(prefix + "dir") + return store.initialize(dir, 8) +} + +func (store *LevelDB2Store) initialize(dir string, dbCount int) (err error) { + glog.Infof("filer store leveldb2 dir: %s", dir) + os.MkdirAll(dir, 0755) + if err := weed_util.TestFolderWritable(dir); err != nil { + return fmt.Errorf("Check Level Folder %s Writable: %s", dir, err) + } + + opts := &opt.Options{ + BlockCacheCapacity: 32 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 16 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 4, + } + + for d := 0; d < dbCount; d++ { + dbFolder := fmt.Sprintf("%s/%02d", dir, d) + os.MkdirAll(dbFolder, 0755) + db, dbErr := leveldb.OpenFile(dbFolder, opts) + if leveldb_errors.IsCorrupted(dbErr) { + db, dbErr = leveldb.RecoverFile(dbFolder, opts) + } + if dbErr != nil { + glog.Errorf("filer store open dir %s: %v", dbFolder, dbErr) + return dbErr + } + store.dbs = append(store.dbs, db) + } + store.dbCount = dbCount + + return +} + +func (store *LevelDB2Store) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *LevelDB2Store) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *LevelDB2Store) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *LevelDB2Store) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + dir, name := entry.DirAndName() + key, partitionId := genKey(dir, name, store.dbCount) + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + if len(entry.Chunks) > 50 { + value = weed_util.MaybeGzipData(value) + } + + err = store.dbs[partitionId].Put(key, value, nil) + + if err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + // println("saved", entry.FullPath, "chunks", len(entry.Chunks)) + + return nil +} + +func (store *LevelDB2Store) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + return store.InsertEntry(ctx, entry) +} + +func (store *LevelDB2Store) FindEntry(ctx context.Context, fullpath weed_util.FullPath) (entry *filer.Entry, err error) { + dir, name := fullpath.DirAndName() + key, partitionId := genKey(dir, name, store.dbCount) + + data, err := store.dbs[partitionId].Get(key, nil) + + if err == leveldb.ErrNotFound { + return nil, filer_pb.ErrNotFound + } + if err != nil { + return nil, fmt.Errorf("get %s : %v", fullpath, err) + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(weed_util.MaybeDecompressData(data)) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + // println("read", entry.FullPath, "chunks", len(entry.Chunks), "data", len(data), string(data)) + + return entry, nil +} + +func (store *LevelDB2Store) DeleteEntry(ctx context.Context, fullpath weed_util.FullPath) (err error) { + dir, name := fullpath.DirAndName() + key, partitionId := genKey(dir, name, store.dbCount) + + err = store.dbs[partitionId].Delete(key, nil) + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *LevelDB2Store) DeleteFolderChildren(ctx context.Context, fullpath weed_util.FullPath) (err error) { + directoryPrefix, partitionId := genDirectoryKeyPrefix(fullpath, "", store.dbCount) + + batch := new(leveldb.Batch) + + iter := store.dbs[partitionId].NewIterator(&leveldb_util.Range{Start: directoryPrefix}, nil) + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, directoryPrefix) { + break + } + fileName := getNameFromKey(key) + if fileName == "" { + continue + } + batch.Delete(append(directoryPrefix, []byte(fileName)...)) + } + iter.Release() + + err = store.dbs[partitionId].Write(batch, nil) + + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *LevelDB2Store) ListDirectoryEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc) +} + +func (store *LevelDB2Store) ListDirectoryPrefixedEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + directoryPrefix, partitionId := genDirectoryKeyPrefix(dirPath, prefix, store.dbCount) + lastFileStart := directoryPrefix + if startFileName != "" { + lastFileStart, _ = genDirectoryKeyPrefix(dirPath, startFileName, store.dbCount) + } + + iter := store.dbs[partitionId].NewIterator(&leveldb_util.Range{Start: lastFileStart}, nil) + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, directoryPrefix) { + break + } + fileName := getNameFromKey(key) + if fileName == "" { + continue + } + if fileName == startFileName && !includeStartFile { + continue + } + limit-- + if limit < 0 { + break + } + lastFileName = fileName + entry := &filer.Entry{ + FullPath: weed_util.NewFullPath(string(dirPath), fileName), + } + + // println("list", entry.FullPath, "chunks", len(entry.Chunks)) + if decodeErr := entry.DecodeAttributesAndChunks(weed_util.MaybeDecompressData(iter.Value())); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + break + } + if !eachEntryFunc(entry) { + break + } + } + iter.Release() + + return lastFileName, err +} + +func genKey(dirPath, fileName string, dbCount int) (key []byte, partitionId int) { + key, partitionId = hashToBytes(dirPath, dbCount) + key = append(key, []byte(fileName)...) + return key, partitionId +} + +func genDirectoryKeyPrefix(fullpath weed_util.FullPath, startFileName string, dbCount int) (keyPrefix []byte, partitionId int) { + keyPrefix, partitionId = hashToBytes(string(fullpath), dbCount) + if len(startFileName) > 0 { + keyPrefix = append(keyPrefix, []byte(startFileName)...) + } + return keyPrefix, partitionId +} + +func getNameFromKey(key []byte) string { + + return string(key[md5.Size:]) + +} + +// hash directory, and use last byte for partitioning +func hashToBytes(dir string, dbCount int) ([]byte, int) { + h := md5.New() + io.WriteString(h, dir) + + b := h.Sum(nil) + + x := b[len(b)-1] + + return b, int(x) % dbCount +} + +func (store *LevelDB2Store) Shutdown() { + for d := 0; d < store.dbCount; d++ { + store.dbs[d].Close() + } +} diff --git a/weed/filer/leveldb2/leveldb2_store_kv.go b/weed/filer/leveldb2/leveldb2_store_kv.go new file mode 100644 index 000000000..b415d3c32 --- /dev/null +++ b/weed/filer/leveldb2/leveldb2_store_kv.go @@ -0,0 +1,56 @@ +package leveldb + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/syndtr/goleveldb/leveldb" +) + +func (store *LevelDB2Store) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + partitionId := bucketKvKey(key, store.dbCount) + + err = store.dbs[partitionId].Put(key, value, nil) + + if err != nil { + return fmt.Errorf("kv bucket %d put: %v", partitionId, err) + } + + return nil +} + +func (store *LevelDB2Store) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + + partitionId := bucketKvKey(key, store.dbCount) + + value, err = store.dbs[partitionId].Get(key, nil) + + if err == leveldb.ErrNotFound { + return nil, filer.ErrKvNotFound + } + + if err != nil { + return nil, fmt.Errorf("kv bucket %d get: %v", partitionId, err) + } + + return +} + +func (store *LevelDB2Store) KvDelete(ctx context.Context, key []byte) (err error) { + + partitionId := bucketKvKey(key, store.dbCount) + + err = store.dbs[partitionId].Delete(key, nil) + + if err != nil { + return fmt.Errorf("kv bucket %d delete: %v", partitionId, err) + } + + return nil +} + +func bucketKvKey(key []byte, dbCount int) (partitionId int) { + return int(key[len(key)-1]) % dbCount +} diff --git a/weed/filer/leveldb2/leveldb2_store_test.go b/weed/filer/leveldb2/leveldb2_store_test.go new file mode 100644 index 000000000..fd0ad18a3 --- /dev/null +++ b/weed/filer/leveldb2/leveldb2_store_test.go @@ -0,0 +1,88 @@ +package leveldb + +import ( + "context" + "io/ioutil" + "os" + "testing" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func TestCreateAndFind(t *testing.T) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test") + defer os.RemoveAll(dir) + store := &LevelDB2Store{} + store.initialize(dir, 2) + testFiler.SetStore(store) + + fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg") + + ctx := context.Background() + + entry1 := &filer.Entry{ + FullPath: fullpath, + Attr: filer.Attr{ + Mode: 0440, + Uid: 1234, + Gid: 5678, + }, + } + + if err := testFiler.CreateEntry(ctx, entry1, false, false, nil); err != nil { + t.Errorf("create entry %v: %v", entry1.FullPath, err) + return + } + + entry, err := testFiler.FindEntry(ctx, fullpath) + + if err != nil { + t.Errorf("find entry: %v", err) + return + } + + if entry.FullPath != entry1.FullPath { + t.Errorf("find wrong entry: %v", entry.FullPath) + return + } + + // checking one upper directory + entries, _, _ := testFiler.ListDirectoryEntries(ctx, util.FullPath("/home/chris/this/is/one"), "", false, 100, "", "", "") + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + + // checking one upper directory + entries, _, _ = testFiler.ListDirectoryEntries(ctx, util.FullPath("/"), "", false, 100, "", "", "") + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} + +func TestEmptyRoot(t *testing.T) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test2") + defer os.RemoveAll(dir) + store := &LevelDB2Store{} + store.initialize(dir, 2) + testFiler.SetStore(store) + + ctx := context.Background() + + // checking one upper directory + entries, _, err := testFiler.ListDirectoryEntries(ctx, util.FullPath("/"), "", false, 100, "", "", "") + if err != nil { + t.Errorf("list entries: %v", err) + return + } + if len(entries) != 0 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} diff --git a/weed/filer/leveldb3/leveldb3_store.go b/weed/filer/leveldb3/leveldb3_store.go new file mode 100644 index 000000000..d1cdfbbf6 --- /dev/null +++ b/weed/filer/leveldb3/leveldb3_store.go @@ -0,0 +1,376 @@ +package leveldb + +import ( + "bytes" + "context" + "crypto/md5" + "fmt" + "github.com/syndtr/goleveldb/leveldb" + leveldb_errors "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/opt" + leveldb_util "github.com/syndtr/goleveldb/leveldb/util" + "io" + "os" + "strings" + "sync" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + weed_util "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + DEFAULT = "_main" +) + +func init() { + filer.Stores = append(filer.Stores, &LevelDB3Store{}) +} + +type LevelDB3Store struct { + dir string + dbs map[string]*leveldb.DB + dbsLock sync.RWMutex +} + +func (store *LevelDB3Store) GetName() string { + return "leveldb3" +} + +func (store *LevelDB3Store) Initialize(configuration weed_util.Configuration, prefix string) (err error) { + dir := configuration.GetString(prefix + "dir") + return store.initialize(dir) +} + +func (store *LevelDB3Store) initialize(dir string) (err error) { + glog.Infof("filer store leveldb3 dir: %s", dir) + os.MkdirAll(dir, 0755) + if err := weed_util.TestFolderWritable(dir); err != nil { + return fmt.Errorf("Check Level Folder %s Writable: %s", dir, err) + } + store.dir = dir + + db, loadDbErr := store.loadDB(DEFAULT) + if loadDbErr != nil { + return loadDbErr + } + store.dbs = make(map[string]*leveldb.DB) + store.dbs[DEFAULT] = db + + return +} + +func (store *LevelDB3Store) loadDB(name string) (*leveldb.DB, error) { + + opts := &opt.Options{ + BlockCacheCapacity: 32 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 16 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 4, + } + if name != DEFAULT { + opts = &opt.Options{ + BlockCacheCapacity: 4 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 2 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 4, + } + } + + dbFolder := fmt.Sprintf("%s/%s", store.dir, name) + os.MkdirAll(dbFolder, 0755) + db, dbErr := leveldb.OpenFile(dbFolder, opts) + if leveldb_errors.IsCorrupted(dbErr) { + db, dbErr = leveldb.RecoverFile(dbFolder, opts) + } + if dbErr != nil { + glog.Errorf("filer store open dir %s: %v", dbFolder, dbErr) + return nil, dbErr + } + return db, nil +} + +func (store *LevelDB3Store) findDB(fullpath weed_util.FullPath, isForChildren bool) (*leveldb.DB, string, weed_util.FullPath, error) { + + store.dbsLock.RLock() + + defaultDB := store.dbs[DEFAULT] + if !strings.HasPrefix(string(fullpath), "/buckets/") { + store.dbsLock.RUnlock() + return defaultDB, DEFAULT, fullpath, nil + } + + // detect bucket + bucketAndObjectKey := string(fullpath)[len("/buckets/"):] + t := strings.Index(bucketAndObjectKey, "/") + if t < 0 && !isForChildren { + store.dbsLock.RUnlock() + return defaultDB, DEFAULT, fullpath, nil + } + bucket := bucketAndObjectKey + shortPath := weed_util.FullPath("/") + if t > 0 { + bucket = bucketAndObjectKey[:t] + shortPath = weed_util.FullPath(bucketAndObjectKey[t:]) + } + + if db, found := store.dbs[bucket]; found { + store.dbsLock.RUnlock() + return db, bucket, shortPath, nil + } + + store.dbsLock.RUnlock() + // upgrade to write lock + store.dbsLock.Lock() + defer store.dbsLock.Unlock() + + // double check after getting the write lock + if db, found := store.dbs[bucket]; found { + return db, bucket, shortPath, nil + } + + // create db + db, err := store.loadDB(bucket) + if err != nil { + return nil, bucket, shortPath, err + } + store.dbs[bucket] = db + + return db, bucket, shortPath, nil +} + +func (store *LevelDB3Store) closeDB(bucket string) { + + store.dbsLock.Lock() + defer store.dbsLock.Unlock() + + if db, found := store.dbs[bucket]; found { + db.Close() + delete(store.dbs, bucket) + } + +} + +func (store *LevelDB3Store) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *LevelDB3Store) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *LevelDB3Store) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *LevelDB3Store) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + + db, _, shortPath, err := store.findDB(entry.FullPath, false) + if err != nil { + return fmt.Errorf("findDB %s : %v", entry.FullPath, err) + } + + dir, name := shortPath.DirAndName() + key := genKey(dir, name) + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + if len(entry.Chunks) > 50 { + value = weed_util.MaybeGzipData(value) + } + + err = db.Put(key, value, nil) + + if err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + // println("saved", entry.FullPath, "chunks", len(entry.Chunks)) + + return nil +} + +func (store *LevelDB3Store) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + return store.InsertEntry(ctx, entry) +} + +func (store *LevelDB3Store) FindEntry(ctx context.Context, fullpath weed_util.FullPath) (entry *filer.Entry, err error) { + + db, _, shortPath, err := store.findDB(fullpath, false) + if err != nil { + return nil, fmt.Errorf("findDB %s : %v", fullpath, err) + } + + dir, name := shortPath.DirAndName() + key := genKey(dir, name) + + data, err := db.Get(key, nil) + + if err == leveldb.ErrNotFound { + return nil, filer_pb.ErrNotFound + } + if err != nil { + return nil, fmt.Errorf("get %s : %v", fullpath, err) + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(weed_util.MaybeDecompressData(data)) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + // println("read", entry.FullPath, "chunks", len(entry.Chunks), "data", len(data), string(data)) + + return entry, nil +} + +func (store *LevelDB3Store) DeleteEntry(ctx context.Context, fullpath weed_util.FullPath) (err error) { + + db, _, shortPath, err := store.findDB(fullpath, false) + if err != nil { + return fmt.Errorf("findDB %s : %v", fullpath, err) + } + + dir, name := shortPath.DirAndName() + key := genKey(dir, name) + + err = db.Delete(key, nil) + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *LevelDB3Store) DeleteFolderChildren(ctx context.Context, fullpath weed_util.FullPath) (err error) { + + db, bucket, shortPath, err := store.findDB(fullpath, true) + if err != nil { + return fmt.Errorf("findDB %s : %v", fullpath, err) + } + + if bucket != DEFAULT && shortPath == "/" { + store.closeDB(bucket) + if bucket != "" { // just to make sure + os.RemoveAll(store.dir + "/" + bucket) + } + return nil + } + + directoryPrefix := genDirectoryKeyPrefix(shortPath, "") + + batch := new(leveldb.Batch) + + iter := db.NewIterator(&leveldb_util.Range{Start: directoryPrefix}, nil) + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, directoryPrefix) { + break + } + fileName := getNameFromKey(key) + if fileName == "" { + continue + } + batch.Delete(append(directoryPrefix, []byte(fileName)...)) + } + iter.Release() + + err = db.Write(batch, nil) + + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *LevelDB3Store) ListDirectoryEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc) +} + +func (store *LevelDB3Store) ListDirectoryPrefixedEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + db, _, shortPath, err := store.findDB(dirPath, true) + if err != nil { + return lastFileName, fmt.Errorf("findDB %s : %v", dirPath, err) + } + + directoryPrefix := genDirectoryKeyPrefix(shortPath, prefix) + lastFileStart := directoryPrefix + if startFileName != "" { + lastFileStart = genDirectoryKeyPrefix(shortPath, startFileName) + } + + iter := db.NewIterator(&leveldb_util.Range{Start: lastFileStart}, nil) + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, directoryPrefix) { + break + } + fileName := getNameFromKey(key) + if fileName == "" { + continue + } + if fileName == startFileName && !includeStartFile { + continue + } + limit-- + if limit < 0 { + break + } + lastFileName = fileName + entry := &filer.Entry{ + FullPath: weed_util.NewFullPath(string(dirPath), fileName), + } + + // println("list", entry.FullPath, "chunks", len(entry.Chunks)) + if decodeErr := entry.DecodeAttributesAndChunks(weed_util.MaybeDecompressData(iter.Value())); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + break + } + if !eachEntryFunc(entry) { + break + } + } + iter.Release() + + return lastFileName, err +} + +func genKey(dirPath, fileName string) (key []byte) { + key = hashToBytes(dirPath) + key = append(key, []byte(fileName)...) + return key +} + +func genDirectoryKeyPrefix(fullpath weed_util.FullPath, startFileName string) (keyPrefix []byte) { + keyPrefix = hashToBytes(string(fullpath)) + if len(startFileName) > 0 { + keyPrefix = append(keyPrefix, []byte(startFileName)...) + } + return keyPrefix +} + +func getNameFromKey(key []byte) string { + + return string(key[md5.Size:]) + +} + +// hash directory +func hashToBytes(dir string) []byte { + h := md5.New() + io.WriteString(h, dir) + b := h.Sum(nil) + return b +} + +func (store *LevelDB3Store) Shutdown() { + for _, db := range store.dbs { + db.Close() + } +} diff --git a/weed/filer/leveldb3/leveldb3_store_kv.go b/weed/filer/leveldb3/leveldb3_store_kv.go new file mode 100644 index 000000000..18d782b80 --- /dev/null +++ b/weed/filer/leveldb3/leveldb3_store_kv.go @@ -0,0 +1,46 @@ +package leveldb + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/syndtr/goleveldb/leveldb" +) + +func (store *LevelDB3Store) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + err = store.dbs[DEFAULT].Put(key, value, nil) + + if err != nil { + return fmt.Errorf("kv put: %v", err) + } + + return nil +} + +func (store *LevelDB3Store) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + + value, err = store.dbs[DEFAULT].Get(key, nil) + + if err == leveldb.ErrNotFound { + return nil, filer.ErrKvNotFound + } + + if err != nil { + return nil, fmt.Errorf("kv get: %v", err) + } + + return +} + +func (store *LevelDB3Store) KvDelete(ctx context.Context, key []byte) (err error) { + + err = store.dbs[DEFAULT].Delete(key, nil) + + if err != nil { + return fmt.Errorf("kv delete: %v", err) + } + + return nil +} diff --git a/weed/filer/leveldb3/leveldb3_store_test.go b/weed/filer/leveldb3/leveldb3_store_test.go new file mode 100644 index 000000000..0b970a539 --- /dev/null +++ b/weed/filer/leveldb3/leveldb3_store_test.go @@ -0,0 +1,88 @@ +package leveldb + +import ( + "context" + "io/ioutil" + "os" + "testing" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func TestCreateAndFind(t *testing.T) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test") + defer os.RemoveAll(dir) + store := &LevelDB3Store{} + store.initialize(dir) + testFiler.SetStore(store) + + fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg") + + ctx := context.Background() + + entry1 := &filer.Entry{ + FullPath: fullpath, + Attr: filer.Attr{ + Mode: 0440, + Uid: 1234, + Gid: 5678, + }, + } + + if err := testFiler.CreateEntry(ctx, entry1, false, false, nil); err != nil { + t.Errorf("create entry %v: %v", entry1.FullPath, err) + return + } + + entry, err := testFiler.FindEntry(ctx, fullpath) + + if err != nil { + t.Errorf("find entry: %v", err) + return + } + + if entry.FullPath != entry1.FullPath { + t.Errorf("find wrong entry: %v", entry.FullPath) + return + } + + // checking one upper directory + entries, _, _ := testFiler.ListDirectoryEntries(ctx, util.FullPath("/home/chris/this/is/one"), "", false, 100, "", "", "") + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + + // checking one upper directory + entries, _, _ = testFiler.ListDirectoryEntries(ctx, util.FullPath("/"), "", false, 100, "", "", "") + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} + +func TestEmptyRoot(t *testing.T) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test2") + defer os.RemoveAll(dir) + store := &LevelDB3Store{} + store.initialize(dir) + testFiler.SetStore(store) + + ctx := context.Background() + + // checking one upper directory + entries, _, err := testFiler.ListDirectoryEntries(ctx, util.FullPath("/"), "", false, 100, "", "", "") + if err != nil { + t.Errorf("list entries: %v", err) + return + } + if len(entries) != 0 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} diff --git a/weed/filer/meta_aggregator.go b/weed/filer/meta_aggregator.go new file mode 100644 index 000000000..5c368a57e --- /dev/null +++ b/weed/filer/meta_aggregator.go @@ -0,0 +1,213 @@ +package filer + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/util" + "io" + "sync" + "time" + + "github.com/golang/protobuf/proto" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util/log_buffer" +) + +type MetaAggregator struct { + filers []string + grpcDialOption grpc.DialOption + MetaLogBuffer *log_buffer.LogBuffer + // notifying clients + ListenersLock sync.Mutex + ListenersCond *sync.Cond +} + +// MetaAggregator only aggregates data "on the fly". The logs are not re-persisted to disk. +// The old data comes from what each LocalMetadata persisted on disk. +func NewMetaAggregator(filers []string, grpcDialOption grpc.DialOption) *MetaAggregator { + t := &MetaAggregator{ + filers: filers, + grpcDialOption: grpcDialOption, + } + t.ListenersCond = sync.NewCond(&t.ListenersLock) + t.MetaLogBuffer = log_buffer.NewLogBuffer(LogFlushInterval, nil, func() { + t.ListenersCond.Broadcast() + }) + return t +} + +func (ma *MetaAggregator) StartLoopSubscribe(f *Filer, self string) { + for _, filer := range ma.filers { + go ma.subscribeToOneFiler(f, self, filer) + } +} + +func (ma *MetaAggregator) subscribeToOneFiler(f *Filer, self string, peer string) { + + /* + Each filer reads the "filer.store.id", which is the store's signature when filer starts. + + When reading from other filers' local meta changes: + * if the received change does not contain signature from self, apply the change to current filer store. + + Upon connecting to other filers, need to remember their signature and their offsets. + + */ + + var maybeReplicateMetadataChange func(*filer_pb.SubscribeMetadataResponse) + lastPersistTime := time.Now() + lastTsNs := time.Now().Add(-LogFlushInterval).UnixNano() + + peerSignature, err := ma.readFilerStoreSignature(peer) + for err != nil { + glog.V(0).Infof("connecting to peer filer %s: %v", peer, err) + time.Sleep(1357 * time.Millisecond) + peerSignature, err = ma.readFilerStoreSignature(peer) + } + + // when filer store is not shared by multiple filers + if peerSignature != f.Signature { + if prevTsNs, err := ma.readOffset(f, peer, peerSignature); err == nil { + lastTsNs = prevTsNs + } + + glog.V(0).Infof("follow peer: %v, last %v (%d)", peer, time.Unix(0, lastTsNs), lastTsNs) + var counter int64 + var synced bool + maybeReplicateMetadataChange = func(event *filer_pb.SubscribeMetadataResponse) { + if err := Replay(f.Store, event); err != nil { + glog.Errorf("failed to reply metadata change from %v: %v", peer, err) + return + } + counter++ + if lastPersistTime.Add(time.Minute).Before(time.Now()) { + if err := ma.updateOffset(f, peer, peerSignature, event.TsNs); err == nil { + if event.TsNs < time.Now().Add(-2*time.Minute).UnixNano() { + glog.V(0).Infof("sync with %s progressed to: %v %0.2f/sec", peer, time.Unix(0, event.TsNs), float64(counter)/60.0) + } else if !synced { + synced = true + glog.V(0).Infof("synced with %s", peer) + } + lastPersistTime = time.Now() + counter = 0 + } else { + glog.V(0).Infof("failed to update offset for %v: %v", peer, err) + } + } + } + } + + processEventFn := func(event *filer_pb.SubscribeMetadataResponse) error { + data, err := proto.Marshal(event) + if err != nil { + glog.Errorf("failed to marshal subscribed filer_pb.SubscribeMetadataResponse %+v: %v", event, err) + return err + } + dir := event.Directory + // println("received meta change", dir, "size", len(data)) + ma.MetaLogBuffer.AddToBuffer([]byte(dir), data, 0) + if maybeReplicateMetadataChange != nil { + maybeReplicateMetadataChange(event) + } + return nil + } + + for { + err := pb.WithFilerClient(peer, ma.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + stream, err := client.SubscribeLocalMetadata(ctx, &filer_pb.SubscribeMetadataRequest{ + ClientName: "filer:" + self, + PathPrefix: "/", + SinceNs: lastTsNs, + }) + if err != nil { + return fmt.Errorf("subscribe: %v", err) + } + + for { + resp, listenErr := stream.Recv() + if listenErr == io.EOF { + return nil + } + if listenErr != nil { + return listenErr + } + + if err := processEventFn(resp); err != nil { + return fmt.Errorf("process %v: %v", resp, err) + } + lastTsNs = resp.TsNs + + f.onMetadataChangeEvent(resp) + + } + }) + if err != nil { + glog.V(0).Infof("subscribing remote %s meta change: %v", peer, err) + time.Sleep(1733 * time.Millisecond) + } + } +} + +func (ma *MetaAggregator) readFilerStoreSignature(peer string) (sig int32, err error) { + err = pb.WithFilerClient(peer, ma.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return err + } + sig = resp.Signature + return nil + }) + return +} + +const ( + MetaOffsetPrefix = "Meta" +) + +func (ma *MetaAggregator) readOffset(f *Filer, peer string, peerSignature int32) (lastTsNs int64, err error) { + + key := []byte(MetaOffsetPrefix + "xxxx") + util.Uint32toBytes(key[len(MetaOffsetPrefix):], uint32(peerSignature)) + + value, err := f.Store.KvGet(context.Background(), key) + + if err == ErrKvNotFound { + glog.Warningf("readOffset %s not found", peer) + return 0, nil + } + + if err != nil { + return 0, fmt.Errorf("readOffset %s : %v", peer, err) + } + + lastTsNs = int64(util.BytesToUint64(value)) + + glog.V(0).Infof("readOffset %s : %d", peer, lastTsNs) + + return +} + +func (ma *MetaAggregator) updateOffset(f *Filer, peer string, peerSignature int32, lastTsNs int64) (err error) { + + key := []byte(MetaOffsetPrefix + "xxxx") + util.Uint32toBytes(key[len(MetaOffsetPrefix):], uint32(peerSignature)) + + value := make([]byte, 8) + util.Uint64toBytes(value, uint64(lastTsNs)) + + err = f.Store.KvPut(context.Background(), key, value) + + if err != nil { + return fmt.Errorf("updateOffset %s : %v", peer, err) + } + + glog.V(4).Infof("updateOffset %s : %d", peer, lastTsNs) + + return +} diff --git a/weed/filer/meta_replay.go b/weed/filer/meta_replay.go new file mode 100644 index 000000000..feb76278b --- /dev/null +++ b/weed/filer/meta_replay.go @@ -0,0 +1,37 @@ +package filer + +import ( + "context" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func Replay(filerStore FilerStore, resp *filer_pb.SubscribeMetadataResponse) error { + message := resp.EventNotification + var oldPath util.FullPath + var newEntry *Entry + if message.OldEntry != nil { + oldPath = util.NewFullPath(resp.Directory, message.OldEntry.Name) + glog.V(4).Infof("deleting %v", oldPath) + if err := filerStore.DeleteEntry(context.Background(), oldPath); err != nil { + return err + } + } + + if message.NewEntry != nil { + dir := resp.Directory + if message.NewParentPath != "" { + dir = message.NewParentPath + } + key := util.NewFullPath(dir, message.NewEntry.Name) + glog.V(4).Infof("creating %v", key) + newEntry = FromPbEntry(dir, message.NewEntry) + if err := filerStore.InsertEntry(context.Background(), newEntry); err != nil { + return err + } + } + + return nil +} diff --git a/weed/filer/mongodb/mongodb_store.go b/weed/filer/mongodb/mongodb_store.go new file mode 100644 index 000000000..1ef5056f4 --- /dev/null +++ b/weed/filer/mongodb/mongodb_store.go @@ -0,0 +1,229 @@ +package mongodb + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + "go.mongodb.org/mongo-driver/x/bsonx" + "time" +) + +func init() { + filer.Stores = append(filer.Stores, &MongodbStore{}) +} + +type MongodbStore struct { + connect *mongo.Client + database string + collectionName string +} + +type Model struct { + Directory string `bson:"directory"` + Name string `bson:"name"` + Meta []byte `bson:"meta"` +} + +func (store *MongodbStore) GetName() string { + return "mongodb" +} + +func (store *MongodbStore) Initialize(configuration util.Configuration, prefix string) (err error) { + store.database = configuration.GetString(prefix + "database") + store.collectionName = "filemeta" + poolSize := configuration.GetInt(prefix + "option_pool_size") + return store.connection(configuration.GetString(prefix+"uri"), uint64(poolSize)) +} + +func (store *MongodbStore) connection(uri string, poolSize uint64) (err error) { + ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) + opts := options.Client().ApplyURI(uri) + + if poolSize > 0 { + opts.SetMaxPoolSize(poolSize) + } + + client, err := mongo.Connect(ctx, opts) + if err != nil { + return err + } + + c := client.Database(store.database).Collection(store.collectionName) + err = store.indexUnique(c) + store.connect = client + return err +} + +func (store *MongodbStore) createIndex(c *mongo.Collection, index mongo.IndexModel, opts *options.CreateIndexesOptions) error { + _, err := c.Indexes().CreateOne(context.Background(), index, opts) + return err +} + +func (store *MongodbStore) indexUnique(c *mongo.Collection) error { + opts := options.CreateIndexes().SetMaxTime(10 * time.Second) + + unique := new(bool) + *unique = true + + index := mongo.IndexModel{ + Keys: bsonx.Doc{{Key: "directory", Value: bsonx.Int32(1)}, {Key: "name", Value: bsonx.Int32(1)}}, + Options: &options.IndexOptions{ + Unique: unique, + }, + } + + return store.createIndex(c, index, opts) +} + +func (store *MongodbStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} + +func (store *MongodbStore) CommitTransaction(ctx context.Context) error { + return nil +} + +func (store *MongodbStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *MongodbStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + + return store.UpdateEntry(ctx, entry) + +} + +func (store *MongodbStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + dir, name := entry.FullPath.DirAndName() + meta, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encode %s: %s", entry.FullPath, err) + } + + if len(entry.Chunks) > 50 { + meta = util.MaybeGzipData(meta) + } + + c := store.connect.Database(store.database).Collection(store.collectionName) + + opts := options.Update().SetUpsert(true) + filter := bson.D{{"directory", dir}, {"name", name}} + update := bson.D{{"$set", bson.D{{"meta", meta}}}} + + _, err = c.UpdateOne(ctx, filter, update, opts) + + if err != nil { + return fmt.Errorf("UpdateEntry %s: %v", entry.FullPath, err) + } + + return nil +} + +func (store *MongodbStore) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer.Entry, err error) { + + dir, name := fullpath.DirAndName() + var data Model + + var where = bson.M{"directory": dir, "name": name} + err = store.connect.Database(store.database).Collection(store.collectionName).FindOne(ctx, where).Decode(&data) + if err != mongo.ErrNoDocuments && err != nil { + glog.Errorf("find %s: %v", fullpath, err) + return nil, filer_pb.ErrNotFound + } + + if len(data.Meta) == 0 { + return nil, filer_pb.ErrNotFound + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + + err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data.Meta)) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + return entry, nil +} + +func (store *MongodbStore) DeleteEntry(ctx context.Context, fullpath util.FullPath) error { + + dir, name := fullpath.DirAndName() + + where := bson.M{"directory": dir, "name": name} + _, err := store.connect.Database(store.database).Collection(store.collectionName).DeleteOne(ctx, where) + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *MongodbStore) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) error { + + where := bson.M{"directory": fullpath} + _, err := store.connect.Database(store.database).Collection(store.collectionName).DeleteMany(ctx, where) + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *MongodbStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return lastFileName, filer.ErrUnsupportedListDirectoryPrefixed +} + +func (store *MongodbStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + var where = bson.M{"directory": string(dirPath), "name": bson.M{"$gt": startFileName}} + if includeStartFile { + where["name"] = bson.M{ + "$gte": startFileName, + } + } + optLimit := int64(limit) + opts := &options.FindOptions{Limit: &optLimit, Sort: bson.M{"name": 1}} + cur, err := store.connect.Database(store.database).Collection(store.collectionName).Find(ctx, where, opts) + for cur.Next(ctx) { + var data Model + err := cur.Decode(&data) + if err != nil && err != mongo.ErrNoDocuments { + return lastFileName, err + } + + entry := &filer.Entry{ + FullPath: util.NewFullPath(string(dirPath), data.Name), + } + lastFileName = data.Name + if decodeErr := entry.DecodeAttributesAndChunks(util.MaybeDecompressData(data.Meta)); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + break + } + + if !eachEntryFunc(entry) { + break + } + + } + + if err := cur.Close(ctx); err != nil { + glog.V(0).Infof("list iterator close: %v", err) + } + + return lastFileName, err +} + +func (store *MongodbStore) Shutdown() { + ctx, _ := context.WithTimeout(context.Background(), 10*time.Second) + store.connect.Disconnect(ctx) +} diff --git a/weed/filer/mongodb/mongodb_store_kv.go b/weed/filer/mongodb/mongodb_store_kv.go new file mode 100644 index 000000000..4aa9c3a33 --- /dev/null +++ b/weed/filer/mongodb/mongodb_store_kv.go @@ -0,0 +1,72 @@ +package mongodb + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" +) + +func (store *MongodbStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + dir, name := genDirAndName(key) + + c := store.connect.Database(store.database).Collection(store.collectionName) + + _, err = c.InsertOne(ctx, Model{ + Directory: dir, + Name: name, + Meta: value, + }) + + if err != nil { + return fmt.Errorf("kv put: %v", err) + } + + return nil +} + +func (store *MongodbStore) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + dir, name := genDirAndName(key) + + var data Model + + var where = bson.M{"directory": dir, "name": name} + err = store.connect.Database(store.database).Collection(store.collectionName).FindOne(ctx, where).Decode(&data) + if err != mongo.ErrNoDocuments && err != nil { + glog.Errorf("kv get: %v", err) + return nil, filer.ErrKvNotFound + } + + if len(data.Meta) == 0 { + return nil, filer.ErrKvNotFound + } + + return data.Meta, nil +} + +func (store *MongodbStore) KvDelete(ctx context.Context, key []byte) (err error) { + + dir, name := genDirAndName(key) + + where := bson.M{"directory": dir, "name": name} + _, err = store.connect.Database(store.database).Collection(store.collectionName).DeleteOne(ctx, where) + if err != nil { + return fmt.Errorf("kv delete: %v", err) + } + + return nil +} + +func genDirAndName(key []byte) (dir string, name string) { + for len(key) < 8 { + key = append(key, 0) + } + + dir = string(key[:8]) + name = string(key[8:]) + + return +} diff --git a/weed/filer/mysql/mysql_sql_gen.go b/weed/filer/mysql/mysql_sql_gen.go new file mode 100644 index 000000000..93d3e3f9e --- /dev/null +++ b/weed/filer/mysql/mysql_sql_gen.go @@ -0,0 +1,58 @@ +package mysql + +import ( + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer/abstract_sql" + _ "github.com/go-sql-driver/mysql" +) + +type SqlGenMysql struct { + CreateTableSqlTemplate string + DropTableSqlTemplate string + UpsertQueryTemplate string +} + +var ( + _ = abstract_sql.SqlGenerator(&SqlGenMysql{}) +) + +func (gen *SqlGenMysql) GetSqlInsert(tableName string) string { + if gen.UpsertQueryTemplate != "" { + return fmt.Sprintf(gen.UpsertQueryTemplate, tableName) + } else { + return fmt.Sprintf("INSERT INTO `%s` (dirhash,name,directory,meta) VALUES(?,?,?,?)", tableName) + } +} + +func (gen *SqlGenMysql) GetSqlUpdate(tableName string) string { + return fmt.Sprintf("UPDATE `%s` SET meta=? WHERE dirhash=? AND name=? AND directory=?", tableName) +} + +func (gen *SqlGenMysql) GetSqlFind(tableName string) string { + return fmt.Sprintf("SELECT meta FROM `%s` WHERE dirhash=? AND name=? AND directory=?", tableName) +} + +func (gen *SqlGenMysql) GetSqlDelete(tableName string) string { + return fmt.Sprintf("DELETE FROM `%s` WHERE dirhash=? AND name=? AND directory=?", tableName) +} + +func (gen *SqlGenMysql) GetSqlDeleteFolderChildren(tableName string) string { + return fmt.Sprintf("DELETE FROM `%s` WHERE dirhash=? AND directory=?", tableName) +} + +func (gen *SqlGenMysql) GetSqlListExclusive(tableName string) string { + return fmt.Sprintf("SELECT NAME, meta FROM `%s` WHERE dirhash=? AND name>? AND directory=? AND name like ? ORDER BY NAME ASC LIMIT ?", tableName) +} + +func (gen *SqlGenMysql) GetSqlListInclusive(tableName string) string { + return fmt.Sprintf("SELECT NAME, meta FROM `%s` WHERE dirhash=? AND name>=? AND directory=? AND name like ? ORDER BY NAME ASC LIMIT ?", tableName) +} + +func (gen *SqlGenMysql) GetSqlCreateTable(tableName string) string { + return fmt.Sprintf(gen.CreateTableSqlTemplate, tableName) +} + +func (gen *SqlGenMysql) GetSqlDropTable(tableName string) string { + return fmt.Sprintf(gen.DropTableSqlTemplate, tableName) +} diff --git a/weed/filer/mysql/mysql_store.go b/weed/filer/mysql/mysql_store.go new file mode 100644 index 000000000..fbaa4d5f9 --- /dev/null +++ b/weed/filer/mysql/mysql_store.go @@ -0,0 +1,84 @@ +package mysql + +import ( + "database/sql" + "fmt" + "time" + + "github.com/chrislusf/seaweedfs/weed/filer" + + "github.com/chrislusf/seaweedfs/weed/filer/abstract_sql" + "github.com/chrislusf/seaweedfs/weed/util" + _ "github.com/go-sql-driver/mysql" +) + +const ( + CONNECTION_URL_PATTERN = "%s:%s@tcp(%s:%d)/%s?charset=utf8" +) + +func init() { + filer.Stores = append(filer.Stores, &MysqlStore{}) +} + +type MysqlStore struct { + abstract_sql.AbstractSqlStore +} + +func (store *MysqlStore) GetName() string { + return "mysql" +} + +func (store *MysqlStore) Initialize(configuration util.Configuration, prefix string) (err error) { + return store.initialize( + configuration.GetString(prefix+"upsertQuery"), + configuration.GetBool(prefix+"enableUpsert"), + configuration.GetString(prefix+"username"), + configuration.GetString(prefix+"password"), + configuration.GetString(prefix+"hostname"), + configuration.GetInt(prefix+"port"), + configuration.GetString(prefix+"database"), + configuration.GetInt(prefix+"connection_max_idle"), + configuration.GetInt(prefix+"connection_max_open"), + configuration.GetInt(prefix+"connection_max_lifetime_seconds"), + configuration.GetBool(prefix+"interpolateParams"), + ) +} + +func (store *MysqlStore) initialize(upsertQuery string, enableUpsert bool, user, password, hostname string, port int, database string, maxIdle, maxOpen, + maxLifetimeSeconds int, interpolateParams bool) (err error) { + + store.SupportBucketTable = false + if !enableUpsert { + upsertQuery = "" + } + store.SqlGenerator = &SqlGenMysql{ + CreateTableSqlTemplate: "", + DropTableSqlTemplate: "drop table `%s`", + UpsertQueryTemplate: upsertQuery, + } + + sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, user, password, hostname, port, database) + adaptedSqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, user, "<ADAPTED>", hostname, port, database) + if interpolateParams { + sqlUrl += "&interpolateParams=true" + adaptedSqlUrl += "&interpolateParams=true" + } + + var dbErr error + store.DB, dbErr = sql.Open("mysql", sqlUrl) + if dbErr != nil { + store.DB.Close() + store.DB = nil + return fmt.Errorf("can not connect to %s error:%v", adaptedSqlUrl, err) + } + + store.DB.SetMaxIdleConns(maxIdle) + store.DB.SetMaxOpenConns(maxOpen) + store.DB.SetConnMaxLifetime(time.Duration(maxLifetimeSeconds) * time.Second) + + if err = store.DB.Ping(); err != nil { + return fmt.Errorf("connect to %s error:%v", sqlUrl, err) + } + + return nil +} diff --git a/weed/filer/mysql2/mysql2_store.go b/weed/filer/mysql2/mysql2_store.go new file mode 100644 index 000000000..a1f54455a --- /dev/null +++ b/weed/filer/mysql2/mysql2_store.go @@ -0,0 +1,90 @@ +package mysql2 + +import ( + "context" + "database/sql" + "fmt" + "time" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/filer/abstract_sql" + "github.com/chrislusf/seaweedfs/weed/filer/mysql" + "github.com/chrislusf/seaweedfs/weed/util" + _ "github.com/go-sql-driver/mysql" +) + +const ( + CONNECTION_URL_PATTERN = "%s:%s@tcp(%s:%d)/%s?charset=utf8" +) + +func init() { + filer.Stores = append(filer.Stores, &MysqlStore2{}) +} + +type MysqlStore2 struct { + abstract_sql.AbstractSqlStore +} + +func (store *MysqlStore2) GetName() string { + return "mysql2" +} + +func (store *MysqlStore2) Initialize(configuration util.Configuration, prefix string) (err error) { + return store.initialize( + configuration.GetString(prefix+"createTable"), + configuration.GetString(prefix+"upsertQuery"), + configuration.GetBool(prefix+"enableUpsert"), + configuration.GetString(prefix+"username"), + configuration.GetString(prefix+"password"), + configuration.GetString(prefix+"hostname"), + configuration.GetInt(prefix+"port"), + configuration.GetString(prefix+"database"), + configuration.GetInt(prefix+"connection_max_idle"), + configuration.GetInt(prefix+"connection_max_open"), + configuration.GetInt(prefix+"connection_max_lifetime_seconds"), + configuration.GetBool(prefix+"interpolateParams"), + ) +} + +func (store *MysqlStore2) initialize(createTable, upsertQuery string, enableUpsert bool, user, password, hostname string, port int, database string, maxIdle, maxOpen, + maxLifetimeSeconds int, interpolateParams bool) (err error) { + + store.SupportBucketTable = true + if !enableUpsert { + upsertQuery = "" + } + store.SqlGenerator = &mysql.SqlGenMysql{ + CreateTableSqlTemplate: createTable, + DropTableSqlTemplate: "drop table `%s`", + UpsertQueryTemplate: upsertQuery, + } + + sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, user, password, hostname, port, database) + adaptedSqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, user, "<ADAPTED>", hostname, port, database) + if interpolateParams { + sqlUrl += "&interpolateParams=true" + adaptedSqlUrl += "&interpolateParams=true" + } + + var dbErr error + store.DB, dbErr = sql.Open("mysql", sqlUrl) + if dbErr != nil { + store.DB.Close() + store.DB = nil + return fmt.Errorf("can not connect to %s error:%v", adaptedSqlUrl, err) + } + + store.DB.SetMaxIdleConns(maxIdle) + store.DB.SetMaxOpenConns(maxOpen) + store.DB.SetConnMaxLifetime(time.Duration(maxLifetimeSeconds) * time.Second) + + if err = store.DB.Ping(); err != nil { + return fmt.Errorf("connect to %s error:%v", sqlUrl, err) + } + + if err = store.CreateTable(context.Background(), abstract_sql.DEFAULT_TABLE); err != nil { + return fmt.Errorf("init table %s: %v", abstract_sql.DEFAULT_TABLE, err) + } + + return nil +} diff --git a/weed/filer/permission.go b/weed/filer/permission.go new file mode 100644 index 000000000..0d8b8292b --- /dev/null +++ b/weed/filer/permission.go @@ -0,0 +1,22 @@ +package filer + +func hasWritePermission(dir *Entry, entry *Entry) bool { + + if dir == nil { + return false + } + + if dir.Uid == entry.Uid && dir.Mode&0200 > 0 { + return true + } + + if dir.Gid == entry.Gid && dir.Mode&0020 > 0 { + return true + } + + if dir.Mode&0002 > 0 { + return true + } + + return false +} diff --git a/weed/filer/postgres/README.txt b/weed/filer/postgres/README.txt new file mode 100644 index 000000000..cb0c99c63 --- /dev/null +++ b/weed/filer/postgres/README.txt @@ -0,0 +1,17 @@ + +1. create "seaweedfs" database + +export PGHOME=/Library/PostgreSQL/10 +$PGHOME/bin/createdb --username=postgres --password seaweedfs + +2. create "filemeta" table +$PGHOME/bin/psql --username=postgres --password seaweedfs + +CREATE TABLE IF NOT EXISTS filemeta ( + dirhash BIGINT, + name VARCHAR(65535), + directory VARCHAR(65535), + meta bytea, + PRIMARY KEY (dirhash, name) +); + diff --git a/weed/filer/postgres/postgres_sql_gen.go b/weed/filer/postgres/postgres_sql_gen.go new file mode 100644 index 000000000..6cee3d2da --- /dev/null +++ b/weed/filer/postgres/postgres_sql_gen.go @@ -0,0 +1,58 @@ +package postgres + +import ( + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer/abstract_sql" + _ "github.com/lib/pq" +) + +type SqlGenPostgres struct { + CreateTableSqlTemplate string + DropTableSqlTemplate string + UpsertQueryTemplate string +} + +var ( + _ = abstract_sql.SqlGenerator(&SqlGenPostgres{}) +) + +func (gen *SqlGenPostgres) GetSqlInsert(tableName string) string { + if gen.UpsertQueryTemplate != "" { + return fmt.Sprintf(gen.UpsertQueryTemplate, tableName) + } else { + return fmt.Sprintf(`INSERT INTO "%s" (dirhash,name,directory,meta) VALUES($1,$2,$3,$4)`, tableName) + } +} + +func (gen *SqlGenPostgres) GetSqlUpdate(tableName string) string { + return fmt.Sprintf(`UPDATE "%s" SET meta=$1 WHERE dirhash=$2 AND name=$3 AND directory=$4`, tableName) +} + +func (gen *SqlGenPostgres) GetSqlFind(tableName string) string { + return fmt.Sprintf(`SELECT meta FROM "%s" WHERE dirhash=$1 AND name=$2 AND directory=$3`, tableName) +} + +func (gen *SqlGenPostgres) GetSqlDelete(tableName string) string { + return fmt.Sprintf(`DELETE FROM "%s" WHERE dirhash=$1 AND name=$2 AND directory=$3`, tableName) +} + +func (gen *SqlGenPostgres) GetSqlDeleteFolderChildren(tableName string) string { + return fmt.Sprintf(`DELETE FROM "%s" WHERE dirhash=$1 AND directory=$2`, tableName) +} + +func (gen *SqlGenPostgres) GetSqlListExclusive(tableName string) string { + return fmt.Sprintf(`SELECT NAME, meta FROM "%s" WHERE dirhash=$1 AND name>$2 AND directory=$3 AND name like $4 ORDER BY NAME ASC LIMIT $5`, tableName) +} + +func (gen *SqlGenPostgres) GetSqlListInclusive(tableName string) string { + return fmt.Sprintf(`SELECT NAME, meta FROM "%s" WHERE dirhash=$1 AND name>=$2 AND directory=$3 AND name like $4 ORDER BY NAME ASC LIMIT $5`, tableName) +} + +func (gen *SqlGenPostgres) GetSqlCreateTable(tableName string) string { + return fmt.Sprintf(gen.CreateTableSqlTemplate, tableName) +} + +func (gen *SqlGenPostgres) GetSqlDropTable(tableName string) string { + return fmt.Sprintf(gen.DropTableSqlTemplate, tableName) +} diff --git a/weed/filer/postgres/postgres_store.go b/weed/filer/postgres/postgres_store.go new file mode 100644 index 000000000..a1e16a92a --- /dev/null +++ b/weed/filer/postgres/postgres_store.go @@ -0,0 +1,93 @@ +package postgres + +import ( + "database/sql" + "fmt" + "time" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/filer/abstract_sql" + "github.com/chrislusf/seaweedfs/weed/util" + _ "github.com/lib/pq" +) + +const ( + CONNECTION_URL_PATTERN = "host=%s port=%d sslmode=%s connect_timeout=30" +) + +func init() { + filer.Stores = append(filer.Stores, &PostgresStore{}) +} + +type PostgresStore struct { + abstract_sql.AbstractSqlStore +} + +func (store *PostgresStore) GetName() string { + return "postgres" +} + +func (store *PostgresStore) Initialize(configuration util.Configuration, prefix string) (err error) { + return store.initialize( + configuration.GetString(prefix+"upsertQuery"), + configuration.GetBool(prefix+"enableUpsert"), + configuration.GetString(prefix+"username"), + configuration.GetString(prefix+"password"), + configuration.GetString(prefix+"hostname"), + configuration.GetInt(prefix+"port"), + configuration.GetString(prefix+"database"), + configuration.GetString(prefix+"schema"), + configuration.GetString(prefix+"sslmode"), + configuration.GetInt(prefix+"connection_max_idle"), + configuration.GetInt(prefix+"connection_max_open"), + configuration.GetInt(prefix+"connection_max_lifetime_seconds"), + ) +} + +func (store *PostgresStore) initialize(upsertQuery string, enableUpsert bool, user, password, hostname string, port int, database, schema, sslmode string, maxIdle, maxOpen, maxLifetimeSeconds int) (err error) { + + store.SupportBucketTable = false + if !enableUpsert { + upsertQuery = "" + } + store.SqlGenerator = &SqlGenPostgres{ + CreateTableSqlTemplate: "", + DropTableSqlTemplate: `drop table "%s"`, + UpsertQueryTemplate: upsertQuery, + } + + sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, hostname, port, sslmode) + if user != "" { + sqlUrl += " user=" + user + } + adaptedSqlUrl := sqlUrl + if password != "" { + sqlUrl += " password=" + password + adaptedSqlUrl += " password=ADAPTED" + } + if database != "" { + sqlUrl += " dbname=" + database + adaptedSqlUrl += " dbname=" + database + } + if schema != "" { + sqlUrl += " search_path=" + schema + adaptedSqlUrl += " search_path=" + schema + } + var dbErr error + store.DB, dbErr = sql.Open("postgres", sqlUrl) + if dbErr != nil { + store.DB.Close() + store.DB = nil + return fmt.Errorf("can not connect to %s error:%v", adaptedSqlUrl, err) + } + + store.DB.SetMaxIdleConns(maxIdle) + store.DB.SetMaxOpenConns(maxOpen) + store.DB.SetConnMaxLifetime(time.Duration(maxLifetimeSeconds) * time.Second) + + if err = store.DB.Ping(); err != nil { + return fmt.Errorf("connect to %s error:%v", sqlUrl, err) + } + + return nil +} diff --git a/weed/filer/postgres2/postgres2_store.go b/weed/filer/postgres2/postgres2_store.go new file mode 100644 index 000000000..0f573d8d0 --- /dev/null +++ b/weed/filer/postgres2/postgres2_store.go @@ -0,0 +1,100 @@ +package postgres2 + +import ( + "context" + "database/sql" + "fmt" + "time" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/filer/abstract_sql" + "github.com/chrislusf/seaweedfs/weed/filer/postgres" + "github.com/chrislusf/seaweedfs/weed/util" + _ "github.com/lib/pq" +) + +const ( + CONNECTION_URL_PATTERN = "host=%s port=%d sslmode=%s connect_timeout=30" +) + +func init() { + filer.Stores = append(filer.Stores, &PostgresStore2{}) +} + +type PostgresStore2 struct { + abstract_sql.AbstractSqlStore +} + +func (store *PostgresStore2) GetName() string { + return "postgres2" +} + +func (store *PostgresStore2) Initialize(configuration util.Configuration, prefix string) (err error) { + return store.initialize( + configuration.GetString(prefix+"createTable"), + configuration.GetString(prefix+"upsertQuery"), + configuration.GetBool(prefix+"enableUpsert"), + configuration.GetString(prefix+"username"), + configuration.GetString(prefix+"password"), + configuration.GetString(prefix+"hostname"), + configuration.GetInt(prefix+"port"), + configuration.GetString(prefix+"database"), + configuration.GetString(prefix+"schema"), + configuration.GetString(prefix+"sslmode"), + configuration.GetInt(prefix+"connection_max_idle"), + configuration.GetInt(prefix+"connection_max_open"), + configuration.GetInt(prefix+"connection_max_lifetime_seconds"), + ) +} + +func (store *PostgresStore2) initialize(createTable, upsertQuery string, enableUpsert bool, user, password, hostname string, port int, database, schema, sslmode string, maxIdle, maxOpen, maxLifetimeSeconds int) (err error) { + + store.SupportBucketTable = true + if !enableUpsert { + upsertQuery = "" + } + store.SqlGenerator = &postgres.SqlGenPostgres{ + CreateTableSqlTemplate: createTable, + DropTableSqlTemplate: `drop table "%s"`, + UpsertQueryTemplate: upsertQuery, + } + + sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, hostname, port, sslmode) + if user != "" { + sqlUrl += " user=" + user + } + adaptedSqlUrl := sqlUrl + if password != "" { + sqlUrl += " password=" + password + adaptedSqlUrl += " password=ADAPTED" + } + if database != "" { + sqlUrl += " dbname=" + database + adaptedSqlUrl += " dbname=" + database + } + if schema != "" { + sqlUrl += " search_path=" + schema + adaptedSqlUrl += " search_path=" + schema + } + var dbErr error + store.DB, dbErr = sql.Open("postgres", sqlUrl) + if dbErr != nil { + store.DB.Close() + store.DB = nil + return fmt.Errorf("can not connect to %s error:%v", adaptedSqlUrl, err) + } + + store.DB.SetMaxIdleConns(maxIdle) + store.DB.SetMaxOpenConns(maxOpen) + store.DB.SetConnMaxLifetime(time.Duration(maxLifetimeSeconds) * time.Second) + + if err = store.DB.Ping(); err != nil { + return fmt.Errorf("connect to %s error:%v", sqlUrl, err) + } + + if err = store.CreateTable(context.Background(), abstract_sql.DEFAULT_TABLE); err != nil { + return fmt.Errorf("init table %s: %v", abstract_sql.DEFAULT_TABLE, err) + } + + return nil +} diff --git a/weed/filer/read_write.go b/weed/filer/read_write.go new file mode 100644 index 000000000..d92d526d5 --- /dev/null +++ b/weed/filer/read_write.go @@ -0,0 +1,116 @@ +package filer + +import ( + "bytes" + "fmt" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/wdclient" + "io/ioutil" + "math" + "net/http" + "time" +) + +func ReadEntry(masterClient *wdclient.MasterClient, filerClient filer_pb.SeaweedFilerClient, dir, name string, byteBuffer *bytes.Buffer) error { + + request := &filer_pb.LookupDirectoryEntryRequest{ + Directory: dir, + Name: name, + } + respLookupEntry, err := filer_pb.LookupEntry(filerClient, request) + if err != nil { + return err + } + if len(respLookupEntry.Entry.Content) > 0 { + _, err = byteBuffer.Write(respLookupEntry.Entry.Content) + return err + } + + return StreamContent(masterClient, byteBuffer, respLookupEntry.Entry.Chunks, 0, math.MaxInt64, false) + +} + +func ReadContent(filerAddress string, dir, name string) ([]byte, error) { + + target := fmt.Sprintf("http://%s%s/%s", filerAddress, dir, name) + + data, _, err := util.Get(target) + + return data, err +} + +func SaveAs(host string, port int, dir, name string, contentType string, byteBuffer *bytes.Buffer) error { + var target string + if port == 0 { + target = fmt.Sprintf("http://%s%s/%s", host, dir, name) + } else { + target = fmt.Sprintf("http://%s:%d%s/%s", host, port, dir, name) + } + + // set the HTTP method, url, and request body + req, err := http.NewRequest(http.MethodPut, target, byteBuffer) + if err != nil { + return err + } + + // set the request header Content-Type for json + if contentType != "" { + req.Header.Set("Content-Type", contentType) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer util.CloseResponse(resp) + + b, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + + if resp.StatusCode >= 400 { + return fmt.Errorf("%s: %s %v", target, resp.Status, string(b)) + } + + return nil + +} + +func SaveInsideFiler(client filer_pb.SeaweedFilerClient, dir, name string, content []byte) error { + + resp, err := filer_pb.LookupEntry(client, &filer_pb.LookupDirectoryEntryRequest{ + Directory: dir, + Name: name, + }) + + if err == filer_pb.ErrNotFound { + err = filer_pb.CreateEntry(client, &filer_pb.CreateEntryRequest{ + Directory: dir, + Entry: &filer_pb.Entry{ + Name: name, + IsDirectory: false, + Attributes: &filer_pb.FuseAttributes{ + Mtime: time.Now().Unix(), + Crtime: time.Now().Unix(), + FileMode: uint32(0644), + Collection: "", + Replication: "", + FileSize: uint64(len(content)), + }, + Content: content, + }, + }) + } else if err == nil { + entry := resp.Entry + entry.Content = content + entry.Attributes.Mtime = time.Now().Unix() + entry.Attributes.FileSize = uint64(len(content)) + err = filer_pb.UpdateEntry(client, &filer_pb.UpdateEntryRequest{ + Directory: dir, + Entry: entry, + }) + } + + return err +} diff --git a/weed/filer/reader_at.go b/weed/filer/reader_at.go new file mode 100644 index 000000000..a1e989684 --- /dev/null +++ b/weed/filer/reader_at.go @@ -0,0 +1,229 @@ +package filer + +import ( + "context" + "fmt" + "io" + "math/rand" + "sync" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/util/chunk_cache" + "github.com/chrislusf/seaweedfs/weed/wdclient" + "github.com/golang/groupcache/singleflight" +) + +type ChunkReadAt struct { + masterClient *wdclient.MasterClient + chunkViews []*ChunkView + lookupFileId wdclient.LookupFileIdFunctionType + readerLock sync.Mutex + fileSize int64 + + fetchGroup singleflight.Group + chunkCache chunk_cache.ChunkCache + lastChunkFileId string + lastChunkData []byte +} + +var _ = io.ReaderAt(&ChunkReadAt{}) +var _ = io.Closer(&ChunkReadAt{}) + +func LookupFn(filerClient filer_pb.FilerClient) wdclient.LookupFileIdFunctionType { + + vidCache := make(map[string]*filer_pb.Locations) + var vicCacheLock sync.RWMutex + return func(fileId string) (targetUrls []string, err error) { + vid := VolumeId(fileId) + vicCacheLock.RLock() + locations, found := vidCache[vid] + vicCacheLock.RUnlock() + + if !found { + util.Retry("lookup volume "+vid, func() error { + err = filerClient.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.LookupVolume(context.Background(), &filer_pb.LookupVolumeRequest{ + VolumeIds: []string{vid}, + }) + if err != nil { + return err + } + + locations = resp.LocationsMap[vid] + if locations == nil || len(locations.Locations) == 0 { + glog.V(0).Infof("failed to locate %s", fileId) + return fmt.Errorf("failed to locate %s", fileId) + } + vicCacheLock.Lock() + vidCache[vid] = locations + vicCacheLock.Unlock() + + return nil + }) + return err + }) + } + + if err != nil { + return nil, err + } + + for _, loc := range locations.Locations { + volumeServerAddress := filerClient.AdjustedUrl(loc) + targetUrl := fmt.Sprintf("http://%s/%s", volumeServerAddress, fileId) + targetUrls = append(targetUrls, targetUrl) + } + + for i := len(targetUrls) - 1; i > 0; i-- { + j := rand.Intn(i + 1) + targetUrls[i], targetUrls[j] = targetUrls[j], targetUrls[i] + } + + return + } +} + +func NewChunkReaderAtFromClient(lookupFn wdclient.LookupFileIdFunctionType, chunkViews []*ChunkView, chunkCache chunk_cache.ChunkCache, fileSize int64) *ChunkReadAt { + + return &ChunkReadAt{ + chunkViews: chunkViews, + lookupFileId: lookupFn, + chunkCache: chunkCache, + fileSize: fileSize, + } +} + +func (c *ChunkReadAt) Close() error { + c.lastChunkData = nil + c.lastChunkFileId = "" + return nil +} + +func (c *ChunkReadAt) ReadAt(p []byte, offset int64) (n int, err error) { + + c.readerLock.Lock() + defer c.readerLock.Unlock() + + glog.V(4).Infof("ReadAt [%d,%d) of total file size %d bytes %d chunk views", offset, offset+int64(len(p)), c.fileSize, len(c.chunkViews)) + return c.doReadAt(p, offset) +} + +func (c *ChunkReadAt) doReadAt(p []byte, offset int64) (n int, err error) { + + startOffset, remaining := offset, int64(len(p)) + var nextChunk *ChunkView + for i, chunk := range c.chunkViews { + if remaining <= 0 { + break + } + if i+1 < len(c.chunkViews) { + nextChunk = c.chunkViews[i+1] + } else { + nextChunk = nil + } + if startOffset < chunk.LogicOffset { + gap := int(chunk.LogicOffset - startOffset) + glog.V(4).Infof("zero [%d,%d)", startOffset, startOffset+int64(gap)) + n += int(min(int64(gap), remaining)) + startOffset, remaining = chunk.LogicOffset, remaining-int64(gap) + if remaining <= 0 { + break + } + } + // fmt.Printf(">>> doReadAt [%d,%d), chunk[%d,%d)\n", offset, offset+int64(len(p)), chunk.LogicOffset, chunk.LogicOffset+int64(chunk.Size)) + chunkStart, chunkStop := max(chunk.LogicOffset, startOffset), min(chunk.LogicOffset+int64(chunk.Size), startOffset+remaining) + if chunkStart >= chunkStop { + continue + } + glog.V(4).Infof("read [%d,%d), %d/%d chunk %s [%d,%d)", chunkStart, chunkStop, i, len(c.chunkViews), chunk.FileId, chunk.LogicOffset-chunk.Offset, chunk.LogicOffset-chunk.Offset+int64(chunk.Size)) + var buffer []byte + buffer, err = c.readFromWholeChunkData(chunk, nextChunk) + if err != nil { + glog.Errorf("fetching chunk %+v: %v\n", chunk, err) + return + } + bufferOffset := chunkStart - chunk.LogicOffset + chunk.Offset + copied := copy(p[startOffset-offset:chunkStop-chunkStart+startOffset-offset], buffer[bufferOffset:bufferOffset+chunkStop-chunkStart]) + n += copied + startOffset, remaining = startOffset+int64(copied), remaining-int64(copied) + } + + glog.V(4).Infof("doReadAt [%d,%d), n:%v, err:%v", offset, offset+int64(len(p)), n, err) + + if err == nil && remaining > 0 && c.fileSize > startOffset { + delta := int(min(remaining, c.fileSize-startOffset)) + glog.V(4).Infof("zero2 [%d,%d) of file size %d bytes", startOffset, startOffset+int64(delta), c.fileSize) + n += delta + } + + if err == nil && offset+int64(len(p)) >= c.fileSize { + err = io.EOF + } + // fmt.Printf("~~~ filled %d, err: %v\n\n", n, err) + + return + +} + +func (c *ChunkReadAt) readFromWholeChunkData(chunkView *ChunkView, nextChunkViews ...*ChunkView) (chunkData []byte, err error) { + + if c.lastChunkFileId == chunkView.FileId { + return c.lastChunkData, nil + } + + v, doErr := c.readOneWholeChunk(chunkView) + + if doErr != nil { + return nil, doErr + } + + chunkData = v.([]byte) + + c.lastChunkData = chunkData + c.lastChunkFileId = chunkView.FileId + + for _, nextChunkView := range nextChunkViews { + if c.chunkCache != nil && nextChunkView != nil { + go c.readOneWholeChunk(nextChunkView) + } + } + + return +} + +func (c *ChunkReadAt) readOneWholeChunk(chunkView *ChunkView) (interface{}, error) { + + var err error + + return c.fetchGroup.Do(chunkView.FileId, func() (interface{}, error) { + + glog.V(4).Infof("readFromWholeChunkData %s offset %d [%d,%d) size at least %d", chunkView.FileId, chunkView.Offset, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size), chunkView.ChunkSize) + + data := c.chunkCache.GetChunk(chunkView.FileId, chunkView.ChunkSize) + if data != nil { + glog.V(4).Infof("cache hit %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset-chunkView.Offset, chunkView.LogicOffset-chunkView.Offset+int64(len(data))) + } else { + var err error + data, err = c.doFetchFullChunkData(chunkView) + if err != nil { + return data, err + } + c.chunkCache.SetChunk(chunkView.FileId, data) + } + return data, err + }) +} + +func (c *ChunkReadAt) doFetchFullChunkData(chunkView *ChunkView) ([]byte, error) { + + glog.V(4).Infof("+ doFetchFullChunkData %s", chunkView.FileId) + + data, err := fetchChunk(c.lookupFileId, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped) + + glog.V(4).Infof("- doFetchFullChunkData %s", chunkView.FileId) + + return data, err + +} diff --git a/weed/filer/reader_at_test.go b/weed/filer/reader_at_test.go new file mode 100644 index 000000000..37a34f4ea --- /dev/null +++ b/weed/filer/reader_at_test.go @@ -0,0 +1,156 @@ +package filer + +import ( + "fmt" + "io" + "math" + "strconv" + "sync" + "testing" +) + +type mockChunkCache struct { +} + +func (m *mockChunkCache) GetChunk(fileId string, minSize uint64) (data []byte) { + x, _ := strconv.Atoi(fileId) + data = make([]byte, minSize) + for i := 0; i < int(minSize); i++ { + data[i] = byte(x) + } + return data +} +func (m *mockChunkCache) SetChunk(fileId string, data []byte) { +} + +func TestReaderAt(t *testing.T) { + + visibles := []VisibleInterval{ + { + start: 1, + stop: 2, + fileId: "1", + chunkSize: 9, + }, + { + start: 3, + stop: 4, + fileId: "3", + chunkSize: 1, + }, + { + start: 5, + stop: 6, + fileId: "5", + chunkSize: 2, + }, + { + start: 7, + stop: 9, + fileId: "7", + chunkSize: 2, + }, + { + start: 9, + stop: 10, + fileId: "9", + chunkSize: 2, + }, + } + + readerAt := &ChunkReadAt{ + chunkViews: ViewFromVisibleIntervals(visibles, 0, math.MaxInt64), + lookupFileId: nil, + readerLock: sync.Mutex{}, + fileSize: 10, + chunkCache: &mockChunkCache{}, + } + + testReadAt(t, readerAt, 0, 10, 10, io.EOF) + testReadAt(t, readerAt, 0, 12, 10, io.EOF) + testReadAt(t, readerAt, 2, 8, 8, io.EOF) + testReadAt(t, readerAt, 3, 6, 6, nil) + +} + +func testReadAt(t *testing.T, readerAt *ChunkReadAt, offset int64, size int, expected int, expectedErr error) { + data := make([]byte, size) + n, err := readerAt.ReadAt(data, offset) + + for _, d := range data { + fmt.Printf("%x", d) + } + fmt.Println() + + if expected != n { + t.Errorf("unexpected read size: %d, expect: %d", n, expected) + } + if err != expectedErr { + t.Errorf("unexpected read error: %v, expect: %v", err, expectedErr) + } + +} + +func TestReaderAt0(t *testing.T) { + + visibles := []VisibleInterval{ + { + start: 2, + stop: 5, + fileId: "1", + chunkSize: 9, + }, + { + start: 7, + stop: 9, + fileId: "2", + chunkSize: 9, + }, + } + + readerAt := &ChunkReadAt{ + chunkViews: ViewFromVisibleIntervals(visibles, 0, math.MaxInt64), + lookupFileId: nil, + readerLock: sync.Mutex{}, + fileSize: 10, + chunkCache: &mockChunkCache{}, + } + + testReadAt(t, readerAt, 0, 10, 10, io.EOF) + testReadAt(t, readerAt, 3, 16, 7, io.EOF) + testReadAt(t, readerAt, 3, 5, 5, nil) + + testReadAt(t, readerAt, 11, 5, 0, io.EOF) + testReadAt(t, readerAt, 10, 5, 0, io.EOF) + +} + +func TestReaderAt1(t *testing.T) { + + visibles := []VisibleInterval{ + { + start: 2, + stop: 5, + fileId: "1", + chunkSize: 9, + }, + } + + readerAt := &ChunkReadAt{ + chunkViews: ViewFromVisibleIntervals(visibles, 0, math.MaxInt64), + lookupFileId: nil, + readerLock: sync.Mutex{}, + fileSize: 20, + chunkCache: &mockChunkCache{}, + } + + testReadAt(t, readerAt, 0, 20, 20, io.EOF) + testReadAt(t, readerAt, 1, 7, 7, nil) + testReadAt(t, readerAt, 0, 1, 1, nil) + testReadAt(t, readerAt, 18, 4, 2, io.EOF) + testReadAt(t, readerAt, 12, 4, 4, nil) + testReadAt(t, readerAt, 4, 20, 16, io.EOF) + testReadAt(t, readerAt, 4, 10, 10, nil) + testReadAt(t, readerAt, 1, 10, 10, nil) + +} diff --git a/weed/filer/redis/redis_cluster_store.go b/weed/filer/redis/redis_cluster_store.go new file mode 100644 index 000000000..9572058a8 --- /dev/null +++ b/weed/filer/redis/redis_cluster_store.go @@ -0,0 +1,42 @@ +package redis + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/go-redis/redis/v8" +) + +func init() { + filer.Stores = append(filer.Stores, &RedisClusterStore{}) +} + +type RedisClusterStore struct { + UniversalRedisStore +} + +func (store *RedisClusterStore) GetName() string { + return "redis_cluster" +} + +func (store *RedisClusterStore) Initialize(configuration util.Configuration, prefix string) (err error) { + + configuration.SetDefault(prefix+"useReadOnly", false) + configuration.SetDefault(prefix+"routeByLatency", false) + + return store.initialize( + configuration.GetStringSlice(prefix+"addresses"), + configuration.GetString(prefix+"password"), + configuration.GetBool(prefix+"useReadOnly"), + configuration.GetBool(prefix+"routeByLatency"), + ) +} + +func (store *RedisClusterStore) initialize(addresses []string, password string, readOnly, routeByLatency bool) (err error) { + store.Client = redis.NewClusterClient(&redis.ClusterOptions{ + Addrs: addresses, + Password: password, + ReadOnly: readOnly, + RouteByLatency: routeByLatency, + }) + return +} diff --git a/weed/filer/redis/redis_store.go b/weed/filer/redis/redis_store.go new file mode 100644 index 000000000..665352a63 --- /dev/null +++ b/weed/filer/redis/redis_store.go @@ -0,0 +1,36 @@ +package redis + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/go-redis/redis/v8" +) + +func init() { + filer.Stores = append(filer.Stores, &RedisStore{}) +} + +type RedisStore struct { + UniversalRedisStore +} + +func (store *RedisStore) GetName() string { + return "redis" +} + +func (store *RedisStore) Initialize(configuration util.Configuration, prefix string) (err error) { + return store.initialize( + configuration.GetString(prefix+"address"), + configuration.GetString(prefix+"password"), + configuration.GetInt(prefix+"database"), + ) +} + +func (store *RedisStore) initialize(hostPort string, password string, database int) (err error) { + store.Client = redis.NewClient(&redis.Options{ + Addr: hostPort, + Password: password, + DB: database, + }) + return +} diff --git a/weed/filer/redis/universal_redis_store.go b/weed/filer/redis/universal_redis_store.go new file mode 100644 index 000000000..30d11a7f4 --- /dev/null +++ b/weed/filer/redis/universal_redis_store.go @@ -0,0 +1,200 @@ +package redis + +import ( + "context" + "fmt" + "sort" + "strings" + "time" + + "github.com/go-redis/redis/v8" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + DIR_LIST_MARKER = "\x00" +) + +type UniversalRedisStore struct { + Client redis.UniversalClient +} + +func (store *UniversalRedisStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *UniversalRedisStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *UniversalRedisStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *UniversalRedisStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + if len(entry.Chunks) > 50 { + value = util.MaybeGzipData(value) + } + + _, err = store.Client.Set(ctx, string(entry.FullPath), value, time.Duration(entry.TtlSec)*time.Second).Result() + + if err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + dir, name := entry.FullPath.DirAndName() + if name != "" { + _, err = store.Client.SAdd(ctx, genDirectoryListKey(dir), name).Result() + if err != nil { + return fmt.Errorf("persisting %s in parent dir: %v", entry.FullPath, err) + } + } + + return nil +} + +func (store *UniversalRedisStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + return store.InsertEntry(ctx, entry) +} + +func (store *UniversalRedisStore) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer.Entry, err error) { + + data, err := store.Client.Get(ctx, string(fullpath)).Result() + if err == redis.Nil { + return nil, filer_pb.ErrNotFound + } + + if err != nil { + return nil, fmt.Errorf("get %s : %v", fullpath, err) + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData([]byte(data))) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + return entry, nil +} + +func (store *UniversalRedisStore) DeleteEntry(ctx context.Context, fullpath util.FullPath) (err error) { + + _, err = store.Client.Del(ctx, string(fullpath)).Result() + + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + dir, name := fullpath.DirAndName() + if name != "" { + _, err = store.Client.SRem(ctx, genDirectoryListKey(dir), name).Result() + if err != nil { + return fmt.Errorf("delete %s in parent dir: %v", fullpath, err) + } + } + + return nil +} + +func (store *UniversalRedisStore) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) (err error) { + + members, err := store.Client.SMembers(ctx, genDirectoryListKey(string(fullpath))).Result() + if err != nil { + return fmt.Errorf("delete folder %s : %v", fullpath, err) + } + + for _, fileName := range members { + path := util.NewFullPath(string(fullpath), fileName) + _, err = store.Client.Del(ctx, string(path)).Result() + if err != nil { + return fmt.Errorf("delete %s in parent dir: %v", fullpath, err) + } + } + + return nil +} + +func (store *UniversalRedisStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return lastFileName, filer.ErrUnsupportedListDirectoryPrefixed +} + +func (store *UniversalRedisStore) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + dirListKey := genDirectoryListKey(string(dirPath)) + members, err := store.Client.SMembers(ctx, dirListKey).Result() + if err != nil { + return lastFileName, fmt.Errorf("list %s : %v", dirPath, err) + } + + // skip + if startFileName != "" { + var t []string + for _, m := range members { + if strings.Compare(m, startFileName) >= 0 { + if m == startFileName { + if includeStartFile { + t = append(t, m) + } + } else { + t = append(t, m) + } + } + } + members = t + } + + // sort + sort.Slice(members, func(i, j int) bool { + return strings.Compare(members[i], members[j]) < 0 + }) + + // limit + if limit < int64(len(members)) { + members = members[:limit] + } + + // fetch entry meta + for _, fileName := range members { + path := util.NewFullPath(string(dirPath), fileName) + entry, err := store.FindEntry(ctx, path) + lastFileName = fileName + if err != nil { + glog.V(0).Infof("list %s : %v", path, err) + if err == filer_pb.ErrNotFound { + continue + } + } else { + if entry.TtlSec > 0 { + if entry.Attr.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { + store.Client.Del(ctx, string(path)).Result() + store.Client.SRem(ctx, dirListKey, fileName).Result() + continue + } + } + if !eachEntryFunc(entry) { + break + } + } + } + + return lastFileName, err +} + +func genDirectoryListKey(dir string) (dirList string) { + return dir + DIR_LIST_MARKER +} + +func (store *UniversalRedisStore) Shutdown() { + store.Client.Close() +} diff --git a/weed/filer/redis/universal_redis_store_kv.go b/weed/filer/redis/universal_redis_store_kv.go new file mode 100644 index 000000000..ad6e389ed --- /dev/null +++ b/weed/filer/redis/universal_redis_store_kv.go @@ -0,0 +1,42 @@ +package redis + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/go-redis/redis/v8" +) + +func (store *UniversalRedisStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + _, err = store.Client.Set(ctx, string(key), value, 0).Result() + + if err != nil { + return fmt.Errorf("kv put: %v", err) + } + + return nil +} + +func (store *UniversalRedisStore) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + + data, err := store.Client.Get(ctx, string(key)).Result() + + if err == redis.Nil { + return nil, filer.ErrKvNotFound + } + + return []byte(data), err +} + +func (store *UniversalRedisStore) KvDelete(ctx context.Context, key []byte) (err error) { + + _, err = store.Client.Del(ctx, string(key)).Result() + + if err != nil { + return fmt.Errorf("kv delete: %v", err) + } + + return nil +} diff --git a/weed/filer/redis2/redis_cluster_store.go b/weed/filer/redis2/redis_cluster_store.go new file mode 100644 index 000000000..22d09da25 --- /dev/null +++ b/weed/filer/redis2/redis_cluster_store.go @@ -0,0 +1,44 @@ +package redis2 + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/go-redis/redis/v8" +) + +func init() { + filer.Stores = append(filer.Stores, &RedisCluster2Store{}) +} + +type RedisCluster2Store struct { + UniversalRedis2Store +} + +func (store *RedisCluster2Store) GetName() string { + return "redis_cluster2" +} + +func (store *RedisCluster2Store) Initialize(configuration util.Configuration, prefix string) (err error) { + + configuration.SetDefault(prefix+"useReadOnly", false) + configuration.SetDefault(prefix+"routeByLatency", false) + + return store.initialize( + configuration.GetStringSlice(prefix+"addresses"), + configuration.GetString(prefix+"password"), + configuration.GetBool(prefix+"useReadOnly"), + configuration.GetBool(prefix+"routeByLatency"), + configuration.GetStringSlice(prefix+"superLargeDirectories"), + ) +} + +func (store *RedisCluster2Store) initialize(addresses []string, password string, readOnly, routeByLatency bool, superLargeDirectories []string) (err error) { + store.Client = redis.NewClusterClient(&redis.ClusterOptions{ + Addrs: addresses, + Password: password, + ReadOnly: readOnly, + RouteByLatency: routeByLatency, + }) + store.loadSuperLargeDirectories(superLargeDirectories) + return +} diff --git a/weed/filer/redis2/redis_store.go b/weed/filer/redis2/redis_store.go new file mode 100644 index 000000000..8eb97e374 --- /dev/null +++ b/weed/filer/redis2/redis_store.go @@ -0,0 +1,38 @@ +package redis2 + +import ( + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/go-redis/redis/v8" +) + +func init() { + filer.Stores = append(filer.Stores, &Redis2Store{}) +} + +type Redis2Store struct { + UniversalRedis2Store +} + +func (store *Redis2Store) GetName() string { + return "redis2" +} + +func (store *Redis2Store) Initialize(configuration util.Configuration, prefix string) (err error) { + return store.initialize( + configuration.GetString(prefix+"address"), + configuration.GetString(prefix+"password"), + configuration.GetInt(prefix+"database"), + configuration.GetStringSlice(prefix+"superLargeDirectories"), + ) +} + +func (store *Redis2Store) initialize(hostPort string, password string, database int, superLargeDirectories []string) (err error) { + store.Client = redis.NewClient(&redis.Options{ + Addr: hostPort, + Password: password, + DB: database, + }) + store.loadSuperLargeDirectories(superLargeDirectories) + return +} diff --git a/weed/filer/redis2/universal_redis_store.go b/weed/filer/redis2/universal_redis_store.go new file mode 100644 index 000000000..aab3d1f4a --- /dev/null +++ b/weed/filer/redis2/universal_redis_store.go @@ -0,0 +1,204 @@ +package redis2 + +import ( + "context" + "fmt" + "time" + + "github.com/go-redis/redis/v8" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + DIR_LIST_MARKER = "\x00" +) + +type UniversalRedis2Store struct { + Client redis.UniversalClient + superLargeDirectoryHash map[string]bool +} + +func (store *UniversalRedis2Store) isSuperLargeDirectory(dir string) (isSuperLargeDirectory bool) { + _, isSuperLargeDirectory = store.superLargeDirectoryHash[dir] + return +} + +func (store *UniversalRedis2Store) loadSuperLargeDirectories(superLargeDirectories []string) { + // set directory hash + store.superLargeDirectoryHash = make(map[string]bool) + for _, dir := range superLargeDirectories { + store.superLargeDirectoryHash[dir] = true + } +} + +func (store *UniversalRedis2Store) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *UniversalRedis2Store) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *UniversalRedis2Store) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *UniversalRedis2Store) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + if len(entry.Chunks) > 50 { + value = util.MaybeGzipData(value) + } + + if err = store.Client.Set(ctx, string(entry.FullPath), value, time.Duration(entry.TtlSec)*time.Second).Err(); err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + dir, name := entry.FullPath.DirAndName() + if store.isSuperLargeDirectory(dir) { + return nil + } + + if name != "" { + if err = store.Client.ZAddNX(ctx, genDirectoryListKey(dir), &redis.Z{Score: 0, Member: name}).Err(); err != nil { + return fmt.Errorf("persisting %s in parent dir: %v", entry.FullPath, err) + } + } + + return nil +} + +func (store *UniversalRedis2Store) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + return store.InsertEntry(ctx, entry) +} + +func (store *UniversalRedis2Store) FindEntry(ctx context.Context, fullpath util.FullPath) (entry *filer.Entry, err error) { + + data, err := store.Client.Get(ctx, string(fullpath)).Result() + if err == redis.Nil { + return nil, filer_pb.ErrNotFound + } + + if err != nil { + return nil, fmt.Errorf("get %s : %v", fullpath, err) + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(util.MaybeDecompressData([]byte(data))) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + return entry, nil +} + +func (store *UniversalRedis2Store) DeleteEntry(ctx context.Context, fullpath util.FullPath) (err error) { + + _, err = store.Client.Del(ctx, genDirectoryListKey(string(fullpath))).Result() + if err != nil { + return fmt.Errorf("delete dir list %s : %v", fullpath, err) + } + + _, err = store.Client.Del(ctx, string(fullpath)).Result() + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + dir, name := fullpath.DirAndName() + if store.isSuperLargeDirectory(dir) { + return nil + } + if name != "" { + _, err = store.Client.ZRem(ctx, genDirectoryListKey(dir), name).Result() + if err != nil { + return fmt.Errorf("DeleteEntry %s in parent dir: %v", fullpath, err) + } + } + + return nil +} + +func (store *UniversalRedis2Store) DeleteFolderChildren(ctx context.Context, fullpath util.FullPath) (err error) { + + if store.isSuperLargeDirectory(string(fullpath)) { + return nil + } + + members, err := store.Client.ZRange(ctx, genDirectoryListKey(string(fullpath)), 0, -1).Result() + if err != nil { + return fmt.Errorf("DeleteFolderChildren %s : %v", fullpath, err) + } + + for _, fileName := range members { + path := util.NewFullPath(string(fullpath), fileName) + _, err = store.Client.Del(ctx, string(path)).Result() + if err != nil { + return fmt.Errorf("DeleteFolderChildren %s in parent dir: %v", fullpath, err) + } + } + + return nil +} + +func (store *UniversalRedis2Store) ListDirectoryPrefixedEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return lastFileName, filer.ErrUnsupportedListDirectoryPrefixed +} + +func (store *UniversalRedis2Store) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + dirListKey := genDirectoryListKey(string(dirPath)) + start := int64(0) + if startFileName != "" { + start, _ = store.Client.ZRank(ctx, dirListKey, startFileName).Result() + if !includeStartFile { + start++ + } + } + members, err := store.Client.ZRange(ctx, dirListKey, start, start+int64(limit)-1).Result() + if err != nil { + return lastFileName, fmt.Errorf("list %s : %v", dirPath, err) + } + + // fetch entry meta + for _, fileName := range members { + path := util.NewFullPath(string(dirPath), fileName) + entry, err := store.FindEntry(ctx, path) + lastFileName = fileName + if err != nil { + glog.V(0).Infof("list %s : %v", path, err) + if err == filer_pb.ErrNotFound { + continue + } + } else { + if entry.TtlSec > 0 { + if entry.Attr.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { + store.Client.Del(ctx, string(path)).Result() + store.Client.ZRem(ctx, dirListKey, fileName).Result() + continue + } + } + if !eachEntryFunc(entry) { + break + } + } + } + + return lastFileName, err +} + +func genDirectoryListKey(dir string) (dirList string) { + return dir + DIR_LIST_MARKER +} + +func (store *UniversalRedis2Store) Shutdown() { + store.Client.Close() +} diff --git a/weed/filer/redis2/universal_redis_store_kv.go b/weed/filer/redis2/universal_redis_store_kv.go new file mode 100644 index 000000000..bde994dc9 --- /dev/null +++ b/weed/filer/redis2/universal_redis_store_kv.go @@ -0,0 +1,42 @@ +package redis2 + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/go-redis/redis/v8" +) + +func (store *UniversalRedis2Store) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + _, err = store.Client.Set(ctx, string(key), value, 0).Result() + + if err != nil { + return fmt.Errorf("kv put: %v", err) + } + + return nil +} + +func (store *UniversalRedis2Store) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + + data, err := store.Client.Get(ctx, string(key)).Result() + + if err == redis.Nil { + return nil, filer.ErrKvNotFound + } + + return []byte(data), err +} + +func (store *UniversalRedis2Store) KvDelete(ctx context.Context, key []byte) (err error) { + + _, err = store.Client.Del(ctx, string(key)).Result() + + if err != nil { + return fmt.Errorf("kv delete: %v", err) + } + + return nil +} diff --git a/weed/filer/rocksdb/README.md b/weed/filer/rocksdb/README.md new file mode 100644 index 000000000..6bae6d34e --- /dev/null +++ b/weed/filer/rocksdb/README.md @@ -0,0 +1,41 @@ +# Prepare the compilation environment on linux +- sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test +- sudo apt-get update -qq +- sudo apt-get install gcc-6 g++-6 libsnappy-dev zlib1g-dev libbz2-dev -qq +- export CXX="g++-6" CC="gcc-6" + +- wget https://launchpad.net/ubuntu/+archive/primary/+files/libgflags2_2.0-1.1ubuntu1_amd64.deb +- sudo dpkg -i libgflags2_2.0-1.1ubuntu1_amd64.deb +- wget https://launchpad.net/ubuntu/+archive/primary/+files/libgflags-dev_2.0-1.1ubuntu1_amd64.deb +- sudo dpkg -i libgflags-dev_2.0-1.1ubuntu1_amd64.deb + +# Prepare the compilation environment on mac os +``` +brew install snappy +``` + +# install rocksdb: +``` + export ROCKSDB_HOME=/Users/chris/dev/rocksdb + + git clone https://github.com/facebook/rocksdb.git $ROCKSDB_HOME + pushd $ROCKSDB_HOME + make clean + make install-static + popd +``` + +# install gorocksdb + +``` +export CGO_CFLAGS="-I$ROCKSDB_HOME/include" +export CGO_LDFLAGS="-L$ROCKSDB_HOME -lrocksdb -lstdc++ -lm -lz -lbz2 -lsnappy -llz4 -lzstd" + +go get github.com/tecbot/gorocksdb +``` +# compile with rocksdb + +``` +cd ~/go/src/github.com/chrislusf/seaweedfs/weed +go install -tags rocksdb +``` diff --git a/weed/filer/rocksdb/rocksdb_store.go b/weed/filer/rocksdb/rocksdb_store.go new file mode 100644 index 000000000..379a18c62 --- /dev/null +++ b/weed/filer/rocksdb/rocksdb_store.go @@ -0,0 +1,304 @@ +// +build rocksdb + +package rocksdb + +import ( + "bytes" + "context" + "crypto/md5" + "fmt" + "io" + "os" + + "github.com/tecbot/gorocksdb" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + weed_util "github.com/chrislusf/seaweedfs/weed/util" +) + +func init() { + filer.Stores = append(filer.Stores, &RocksDBStore{}) +} + +type options struct { + opt *gorocksdb.Options + ro *gorocksdb.ReadOptions + wo *gorocksdb.WriteOptions +} + +func (opt *options) init() { + opt.opt = gorocksdb.NewDefaultOptions() + opt.ro = gorocksdb.NewDefaultReadOptions() + opt.wo = gorocksdb.NewDefaultWriteOptions() +} + +func (opt *options) close() { + opt.opt.Destroy() + opt.ro.Destroy() + opt.wo.Destroy() +} + +type RocksDBStore struct { + path string + db *gorocksdb.DB + options +} + +func (store *RocksDBStore) GetName() string { + return "rocksdb" +} + +func (store *RocksDBStore) Initialize(configuration weed_util.Configuration, prefix string) (err error) { + dir := configuration.GetString(prefix + "dir") + return store.initialize(dir) +} + +func (store *RocksDBStore) initialize(dir string) (err error) { + glog.Infof("filer store rocksdb dir: %s", dir) + os.MkdirAll(dir, 0755) + if err := weed_util.TestFolderWritable(dir); err != nil { + return fmt.Errorf("Check Level Folder %s Writable: %s", dir, err) + } + store.options.init() + store.opt.SetCreateIfMissing(true) + // reduce write amplification + // also avoid expired data stored in highest level never get compacted + store.opt.SetLevelCompactionDynamicLevelBytes(true) + store.opt.SetCompactionFilter(NewTTLFilter()) + // store.opt.SetMaxBackgroundCompactions(2) + + store.db, err = gorocksdb.OpenDb(store.opt, dir) + + return +} + +func (store *RocksDBStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *RocksDBStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *RocksDBStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *RocksDBStore) InsertEntry(ctx context.Context, entry *filer.Entry) (err error) { + dir, name := entry.DirAndName() + key := genKey(dir, name) + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + err = store.db.Put(store.wo, key, value) + + if err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + // println("saved", entry.FullPath, "chunks", len(entry.Chunks)) + + return nil +} + +func (store *RocksDBStore) UpdateEntry(ctx context.Context, entry *filer.Entry) (err error) { + + return store.InsertEntry(ctx, entry) +} + +func (store *RocksDBStore) FindEntry(ctx context.Context, fullpath weed_util.FullPath) (entry *filer.Entry, err error) { + dir, name := fullpath.DirAndName() + key := genKey(dir, name) + data, err := store.db.Get(store.ro, key) + + if data == nil { + return nil, filer_pb.ErrNotFound + } + defer data.Free() + + if err != nil { + return nil, fmt.Errorf("get %s : %v", fullpath, err) + } + + entry = &filer.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(data.Data()) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + // println("read", entry.FullPath, "chunks", len(entry.Chunks), "data", len(data), string(data)) + + return entry, nil +} + +func (store *RocksDBStore) DeleteEntry(ctx context.Context, fullpath weed_util.FullPath) (err error) { + dir, name := fullpath.DirAndName() + key := genKey(dir, name) + + err = store.db.Delete(store.wo, key) + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *RocksDBStore) DeleteFolderChildren(ctx context.Context, fullpath weed_util.FullPath) (err error) { + directoryPrefix := genDirectoryKeyPrefix(fullpath, "") + + batch := gorocksdb.NewWriteBatch() + defer batch.Destroy() + + ro := gorocksdb.NewDefaultReadOptions() + defer ro.Destroy() + ro.SetFillCache(false) + + iter := store.db.NewIterator(ro) + defer iter.Close() + err = enumerate(iter, directoryPrefix, nil, false, -1, func(key, value []byte) bool { + batch.Delete(key) + return true + }) + if err != nil { + return fmt.Errorf("delete list %s : %v", fullpath, err) + } + + err = store.db.Write(store.wo, batch) + + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func enumerate(iter *gorocksdb.Iterator, prefix, lastKey []byte, includeLastKey bool, limit int64, fn func(key, value []byte) bool) (err error) { + + if len(lastKey) == 0 { + iter.Seek(prefix) + } else { + iter.Seek(lastKey) + if !includeLastKey { + if iter.Valid() { + if bytes.Equal(iter.Key().Data(), lastKey) { + iter.Next() + } + } + } + } + + i := int64(0) + for ; iter.Valid(); iter.Next() { + + if limit > 0 { + i++ + if i > limit { + break + } + } + + key := iter.Key().Data() + + if !bytes.HasPrefix(key, prefix) { + break + } + + ret := fn(key, iter.Value().Data()) + + if !ret { + break + } + + } + + if err := iter.Err(); err != nil { + return fmt.Errorf("prefix scan iterator: %v", err) + } + return nil +} + +func (store *RocksDBStore) ListDirectoryEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + return store.ListDirectoryPrefixedEntries(ctx, dirPath, startFileName, includeStartFile, limit, "", eachEntryFunc) +} + +func (store *RocksDBStore) ListDirectoryPrefixedEntries(ctx context.Context, dirPath weed_util.FullPath, startFileName string, includeStartFile bool, limit int64, prefix string, eachEntryFunc filer.ListEachEntryFunc) (lastFileName string, err error) { + + directoryPrefix := genDirectoryKeyPrefix(dirPath, prefix) + lastFileStart := directoryPrefix + if startFileName != "" { + lastFileStart = genDirectoryKeyPrefix(dirPath, startFileName) + } + + ro := gorocksdb.NewDefaultReadOptions() + defer ro.Destroy() + ro.SetFillCache(false) + + iter := store.db.NewIterator(ro) + defer iter.Close() + err = enumerate(iter, directoryPrefix, lastFileStart, includeStartFile, limit, func(key, value []byte) bool { + fileName := getNameFromKey(key) + if fileName == "" { + return true + } + entry := &filer.Entry{ + FullPath: weed_util.NewFullPath(string(dirPath), fileName), + } + lastFileName = fileName + + // println("list", entry.FullPath, "chunks", len(entry.Chunks)) + if decodeErr := entry.DecodeAttributesAndChunks(value); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + return false + } + if !eachEntryFunc(entry) { + return false + } + return true + }) + if err != nil { + return lastFileName, fmt.Errorf("prefix list %s : %v", dirPath, err) + } + + return lastFileName, err +} + +func genKey(dirPath, fileName string) (key []byte) { + key = hashToBytes(dirPath) + key = append(key, []byte(fileName)...) + return key +} + +func genDirectoryKeyPrefix(fullpath weed_util.FullPath, startFileName string) (keyPrefix []byte) { + keyPrefix = hashToBytes(string(fullpath)) + if len(startFileName) > 0 { + keyPrefix = append(keyPrefix, []byte(startFileName)...) + } + return keyPrefix +} + +func getNameFromKey(key []byte) string { + + return string(key[md5.Size:]) + +} + +// hash directory, and use last byte for partitioning +func hashToBytes(dir string) []byte { + h := md5.New() + io.WriteString(h, dir) + + b := h.Sum(nil) + + return b +} + +func (store *RocksDBStore) Shutdown() { + store.db.Close() + store.options.close() +} diff --git a/weed/filer/rocksdb/rocksdb_store_kv.go b/weed/filer/rocksdb/rocksdb_store_kv.go new file mode 100644 index 000000000..cf1214d5b --- /dev/null +++ b/weed/filer/rocksdb/rocksdb_store_kv.go @@ -0,0 +1,47 @@ +// +build rocksdb + +package rocksdb + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/filer" +) + +func (store *RocksDBStore) KvPut(ctx context.Context, key []byte, value []byte) (err error) { + + err = store.db.Put(store.wo, key, value) + + if err != nil { + return fmt.Errorf("kv put: %v", err) + } + + return nil +} + +func (store *RocksDBStore) KvGet(ctx context.Context, key []byte) (value []byte, err error) { + + value, err = store.db.GetBytes(store.ro, key) + + if value == nil { + return nil, filer.ErrKvNotFound + } + + if err != nil { + return nil, fmt.Errorf("kv get: %v", err) + } + + return +} + +func (store *RocksDBStore) KvDelete(ctx context.Context, key []byte) (err error) { + + err = store.db.Delete(store.wo, key) + + if err != nil { + return fmt.Errorf("kv delete: %v", err) + } + + return nil +} diff --git a/weed/filer/rocksdb/rocksdb_store_test.go b/weed/filer/rocksdb/rocksdb_store_test.go new file mode 100644 index 000000000..f6e755b4b --- /dev/null +++ b/weed/filer/rocksdb/rocksdb_store_test.go @@ -0,0 +1,117 @@ +// +build rocksdb + +package rocksdb + +import ( + "context" + "fmt" + "io/ioutil" + "os" + "testing" + "time" + + "github.com/chrislusf/seaweedfs/weed/filer" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func TestCreateAndFind(t *testing.T) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test") + defer os.RemoveAll(dir) + store := &RocksDBStore{} + store.initialize(dir) + testFiler.SetStore(store) + + fullpath := util.FullPath("/home/chris/this/is/one/file1.jpg") + + ctx := context.Background() + + entry1 := &filer.Entry{ + FullPath: fullpath, + Attr: filer.Attr{ + Mode: 0440, + Uid: 1234, + Gid: 5678, + }, + } + + if err := testFiler.CreateEntry(ctx, entry1, false, false, nil); err != nil { + t.Errorf("create entry %v: %v", entry1.FullPath, err) + return + } + + entry, err := testFiler.FindEntry(ctx, fullpath) + + if err != nil { + t.Errorf("find entry: %v", err) + return + } + + if entry.FullPath != entry1.FullPath { + t.Errorf("find wrong entry: %v", entry.FullPath) + return + } + + // checking one upper directory + entries, _, _ := testFiler.ListDirectoryEntries(ctx, util.FullPath("/home/chris/this/is/one"), "", false, 100, "", "", "") + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + + // checking one upper directory + entries, _, _ = testFiler.ListDirectoryEntries(ctx, util.FullPath("/"), "", false, 100, "", "", "") + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} + +func TestEmptyRoot(t *testing.T) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test2") + defer os.RemoveAll(dir) + store := &RocksDBStore{} + store.initialize(dir) + testFiler.SetStore(store) + + ctx := context.Background() + + // checking one upper directory + entries, _, err := testFiler.ListDirectoryEntries(ctx, util.FullPath("/"), "", false, 100, "", "", "") + if err != nil { + t.Errorf("list entries: %v", err) + return + } + if len(entries) != 0 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} + +func BenchmarkInsertEntry(b *testing.B) { + testFiler := filer.NewFiler(nil, nil, "", 0, "", "", "", nil) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_bench") + defer os.RemoveAll(dir) + store := &RocksDBStore{} + store.initialize(dir) + testFiler.SetStore(store) + + ctx := context.Background() + + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + entry := &filer.Entry{ + FullPath: util.FullPath(fmt.Sprintf("/file%d.txt", i)), + Attr: filer.Attr{ + Crtime: time.Now(), + Mtime: time.Now(), + Mode: os.FileMode(0644), + }, + } + store.InsertEntry(ctx, entry) + } +} diff --git a/weed/filer/rocksdb/rocksdb_ttl.go b/weed/filer/rocksdb/rocksdb_ttl.go new file mode 100644 index 000000000..faed22310 --- /dev/null +++ b/weed/filer/rocksdb/rocksdb_ttl.go @@ -0,0 +1,40 @@ +//+build rocksdb + +package rocksdb + +import ( + "time" + + "github.com/tecbot/gorocksdb" + + "github.com/chrislusf/seaweedfs/weed/filer" +) + +type TTLFilter struct { + skipLevel0 bool +} + +func NewTTLFilter() gorocksdb.CompactionFilter { + return &TTLFilter{ + skipLevel0: true, + } +} + +func (t *TTLFilter) Filter(level int, key, val []byte) (remove bool, newVal []byte) { + // decode could be slow, causing write stall + // level >0 sst can run compaction in parallel + if !t.skipLevel0 || level > 0 { + entry := filer.Entry{} + if err := entry.DecodeAttributesAndChunks(val); err == nil { + if entry.TtlSec > 0 && + entry.Crtime.Add(time.Duration(entry.TtlSec)*time.Second).Before(time.Now()) { + return true, nil + } + } + } + return false, val +} + +func (t *TTLFilter) Name() string { + return "TTLFilter" +} diff --git a/weed/filer/s3iam_conf.go b/weed/filer/s3iam_conf.go new file mode 100644 index 000000000..92387fb09 --- /dev/null +++ b/weed/filer/s3iam_conf.go @@ -0,0 +1,25 @@ +package filer + +import ( + "bytes" + "github.com/chrislusf/seaweedfs/weed/pb/iam_pb" + "github.com/golang/protobuf/jsonpb" + "io" +) + +func ParseS3ConfigurationFromBytes(content []byte, config *iam_pb.S3ApiConfiguration) error { + if err := jsonpb.Unmarshal(bytes.NewBuffer(content), config); err != nil { + return err + } + return nil +} + +func S3ConfigurationToText(writer io.Writer, config *iam_pb.S3ApiConfiguration) error { + + m := jsonpb.Marshaler{ + EmitDefaults: false, + Indent: " ", + } + + return m.Marshal(writer, config) +} diff --git a/weed/filer/s3iam_conf_test.go b/weed/filer/s3iam_conf_test.go new file mode 100644 index 000000000..65cc49840 --- /dev/null +++ b/weed/filer/s3iam_conf_test.go @@ -0,0 +1,57 @@ +package filer + +import ( + "bytes" + . "github.com/chrislusf/seaweedfs/weed/s3api/s3_constants" + "testing" + + "github.com/chrislusf/seaweedfs/weed/pb/iam_pb" + + "github.com/stretchr/testify/assert" +) + +func TestS3Conf(t *testing.T) { + s3Conf := &iam_pb.S3ApiConfiguration{ + Identities: []*iam_pb.Identity{ + { + Name: "some_name", + Credentials: []*iam_pb.Credential{ + { + AccessKey: "some_access_key1", + SecretKey: "some_secret_key1", + }, + }, + Actions: []string{ + ACTION_ADMIN, + ACTION_READ, + ACTION_WRITE, + }, + }, + { + Name: "some_read_only_user", + Credentials: []*iam_pb.Credential{ + { + AccessKey: "some_access_key2", + SecretKey: "some_secret_key2", + }, + }, + Actions: []string{ + ACTION_READ, + ACTION_TAGGING, + ACTION_LIST, + }, + }, + }, + } + var buf bytes.Buffer + err := S3ConfigurationToText(&buf, s3Conf) + assert.Equal(t, err, nil) + s3ConfSaved := &iam_pb.S3ApiConfiguration{} + err = ParseS3ConfigurationFromBytes(buf.Bytes(), s3ConfSaved) + assert.Equal(t, err, nil) + + assert.Equal(t, "some_name", s3ConfSaved.Identities[0].Name) + assert.Equal(t, "some_read_only_user", s3ConfSaved.Identities[1].Name) + assert.Equal(t, "some_access_key1", s3ConfSaved.Identities[0].Credentials[0].AccessKey) + assert.Equal(t, "some_secret_key2", s3ConfSaved.Identities[1].Credentials[0].SecretKey) +} diff --git a/weed/filer/stream.go b/weed/filer/stream.go new file mode 100644 index 000000000..661a210ea --- /dev/null +++ b/weed/filer/stream.go @@ -0,0 +1,245 @@ +package filer + +import ( + "bytes" + "fmt" + "golang.org/x/sync/errgroup" + "io" + "math" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/wdclient" +) + +func StreamContent(masterClient wdclient.HasLookupFileIdFunction, w io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int64, isCheck bool) error { + + glog.V(9).Infof("start to stream content for chunks: %+v\n", chunks) + chunkViews := ViewFromChunks(masterClient.GetLookupFileIdFunction(), chunks, offset, size) + + fileId2Url := make(map[string][]string) + + for _, chunkView := range chunkViews { + + urlStrings, err := masterClient.GetLookupFileIdFunction()(chunkView.FileId) + if err != nil { + glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err) + return err + } else if len(urlStrings) == 0 { + glog.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId) + return fmt.Errorf("operation LookupFileId %s failed, err: urls not found", chunkView.FileId) + } + fileId2Url[chunkView.FileId] = urlStrings + } + + if isCheck { + // Pre-check all chunkViews urls + gErr := new(errgroup.Group) + CheckAllChunkViews(chunkViews, &fileId2Url, gErr) + if err := gErr.Wait(); err != nil { + glog.Errorf("check all chunks: %v", err) + return fmt.Errorf("check all chunks: %v", err) + } + return nil + } + + for _, chunkView := range chunkViews { + + urlStrings := fileId2Url[chunkView.FileId] + data, err := retriedFetchChunkData(urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size)) + if err != nil { + glog.Errorf("read chunk: %v", err) + return fmt.Errorf("read chunk: %v", err) + } + + _, err = w.Write(data) + if err != nil { + glog.Errorf("write chunk: %v", err) + return fmt.Errorf("write chunk: %v", err) + } + } + + return nil + +} + +func CheckAllChunkViews(chunkViews []*ChunkView, fileId2Url *map[string][]string, gErr *errgroup.Group) { + for _, chunkView := range chunkViews { + urlStrings := (*fileId2Url)[chunkView.FileId] + glog.V(9).Infof("Check chunk: %+v\n url: %v", chunkView, urlStrings) + gErr.Go(func() error { + _, err := retriedFetchChunkData(urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size)) + return err + }) + } +} + +// ---------------- ReadAllReader ---------------------------------- + +func ReadAll(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) ([]byte, error) { + + buffer := bytes.Buffer{} + + lookupFileIdFn := func(fileId string) (targetUrls []string, err error) { + return masterClient.LookupFileId(fileId) + } + + chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64) + + for _, chunkView := range chunkViews { + urlStrings, err := lookupFileIdFn(chunkView.FileId) + if err != nil { + glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err) + return nil, err + } + + data, err := retriedFetchChunkData(urlStrings, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size)) + if err != nil { + return nil, err + } + buffer.Write(data) + } + return buffer.Bytes(), nil +} + +// ---------------- ChunkStreamReader ---------------------------------- +type ChunkStreamReader struct { + chunkViews []*ChunkView + logicOffset int64 + buffer []byte + bufferOffset int64 + bufferPos int + chunkIndex int + lookupFileId wdclient.LookupFileIdFunctionType +} + +var _ = io.ReadSeeker(&ChunkStreamReader{}) + +func NewChunkStreamReaderFromFiler(masterClient *wdclient.MasterClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader { + + lookupFileIdFn := func(fileId string) (targetUrl []string, err error) { + return masterClient.LookupFileId(fileId) + } + + chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64) + + return &ChunkStreamReader{ + chunkViews: chunkViews, + lookupFileId: lookupFileIdFn, + } +} + +func NewChunkStreamReader(filerClient filer_pb.FilerClient, chunks []*filer_pb.FileChunk) *ChunkStreamReader { + + lookupFileIdFn := LookupFn(filerClient) + + chunkViews := ViewFromChunks(lookupFileIdFn, chunks, 0, math.MaxInt64) + + return &ChunkStreamReader{ + chunkViews: chunkViews, + lookupFileId: lookupFileIdFn, + } +} + +func (c *ChunkStreamReader) Read(p []byte) (n int, err error) { + for n < len(p) { + if c.isBufferEmpty() { + if c.chunkIndex >= len(c.chunkViews) { + return n, io.EOF + } + chunkView := c.chunkViews[c.chunkIndex] + c.fetchChunkToBuffer(chunkView) + c.chunkIndex++ + } + t := copy(p[n:], c.buffer[c.bufferPos:]) + c.bufferPos += t + n += t + } + return +} + +func (c *ChunkStreamReader) isBufferEmpty() bool { + return len(c.buffer) <= c.bufferPos +} + +func (c *ChunkStreamReader) Seek(offset int64, whence int) (int64, error) { + + var totalSize int64 + for _, chunk := range c.chunkViews { + totalSize += int64(chunk.Size) + } + + var err error + switch whence { + case io.SeekStart: + case io.SeekCurrent: + offset += c.bufferOffset + int64(c.bufferPos) + case io.SeekEnd: + offset = totalSize + offset + } + if offset > totalSize { + err = io.ErrUnexpectedEOF + } + + for i, chunk := range c.chunkViews { + if chunk.LogicOffset <= offset && offset < chunk.LogicOffset+int64(chunk.Size) { + if c.isBufferEmpty() || c.bufferOffset != chunk.LogicOffset { + c.fetchChunkToBuffer(chunk) + c.chunkIndex = i + 1 + break + } + } + } + c.bufferPos = int(offset - c.bufferOffset) + + return offset, err + +} + +func (c *ChunkStreamReader) fetchChunkToBuffer(chunkView *ChunkView) error { + urlStrings, err := c.lookupFileId(chunkView.FileId) + if err != nil { + glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err) + return err + } + var buffer bytes.Buffer + var shouldRetry bool + for _, urlString := range urlStrings { + shouldRetry, err = util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.Offset, int(chunkView.Size), func(data []byte) { + buffer.Write(data) + }) + if !shouldRetry { + break + } + if err != nil { + glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err) + buffer.Reset() + } else { + break + } + } + if err != nil { + return err + } + c.buffer = buffer.Bytes() + c.bufferPos = 0 + c.bufferOffset = chunkView.LogicOffset + + // glog.V(0).Infof("read %s [%d,%d)", chunkView.FileId, chunkView.LogicOffset, chunkView.LogicOffset+int64(chunkView.Size)) + + return nil +} + +func (c *ChunkStreamReader) Close() { + // TODO try to release and reuse buffer +} + +func VolumeId(fileId string) string { + lastCommaIndex := strings.LastIndex(fileId, ",") + if lastCommaIndex > 0 { + return fileId[:lastCommaIndex] + } + return fileId +} diff --git a/weed/filer/topics.go b/weed/filer/topics.go new file mode 100644 index 000000000..3a2fde8c4 --- /dev/null +++ b/weed/filer/topics.go @@ -0,0 +1,6 @@ +package filer + +const ( + TopicsDir = "/topics" + SystemLogDir = TopicsDir + "/.system/log" +) |
