diff options
480 files changed, 47881 insertions, 8049 deletions
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 985bd3781..111f9aa6e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,16 +1,27 @@ --- name: Bug report about: Create a report to help us improve +title: '' +labels: '' +assignees: '' --- Sponsors SeaweedFS via Patreon https://www.patreon.com/seaweedfs +example of a good issue report: +https://github.com/chrislusf/seaweedfs/issues/1005 +example of a bad issue report: +https://github.com/chrislusf/seaweedfs/issues/1008 + **Describe the bug** A clear and concise description of what the bug is. **System Setup** -List the command line to start "weed master", "weed volume", "weed filer", "weed s3", "weed mount". +- List the command line to start "weed master", "weed volume", "weed filer", "weed s3", "weed mount". +- OS version +- output of `weed version` +- if using filer, show the content of `filer.toml` **Expected behavior** A clear and concise description of what you expected to happen. diff --git a/.gitignore b/.gitignore index 5ddd14bd2..671b01051 100644 --- a/.gitignore +++ b/.gitignore @@ -77,4 +77,9 @@ crashlytics-build.properties test_data build +target *.class +other/java/hdfs/dependency-reduced-pom.xml + +# binary file +weed/weed diff --git a/.project b/.project deleted file mode 100644 index 88730cff3..000000000 --- a/.project +++ /dev/null @@ -1,13 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<projectDescription> - <name>seaweedfs</name> - <comment></comment> - <projects> - </projects> - <buildSpec> - </buildSpec> - <natures> - <nature>goclipse.goNature</nature> - <nature>org.eclipse.wst.common.project.facet.core.nature</nature> - </natures> -</projectDescription> diff --git a/.travis.yml b/.travis.yml index f445bff68..bad4a77f1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,18 +1,19 @@ sudo: false language: go go: -- 1.10.x -- 1.11.x -- tip + - 1.12.x + - 1.13.x + - 1.14.x before_install: -- export PATH=/home/travis/gopath/bin:$PATH + - export PATH=/home/travis/gopath/bin:$PATH install: -- go get ./weed/... + - export CGO_ENABLED="0" + - go env script: -- go test ./weed/... + - env GO111MODULE=on go test ./weed/... before_deploy: - make release @@ -22,23 +23,26 @@ deploy: api_key: secure: ERL986+ncQ8lwAJUYDrQ8s2/FxF/cyNIwJIFCqspnWxQgGNNyokET9HapmlPSxjpFRF0q6L2WCg9OY3mSVRq4oI6hg1igOQ12KlLyN71XSJ3c8w0Ay5ho48TQ9l3f3Iu97mntBCe9l0R9pnT8wj1VI8YJxloXwUMG2yeTjA9aBI= file: - - build/linux_arm.tar.gz - - build/linux_arm64.tar.gz - - build/linux_386.tar.gz - - build/linux_amd64.tar.gz - - build/darwin_amd64.tar.gz - - build/windows_386.zip - - build/windows_amd64.zip - - build/freebsd_arm.tar.gz - - build/freebsd_amd64.tar.gz - - build/freebsd_386.tar.gz - - build/netbsd_arm.tar.gz - - build/netbsd_amd64.tar.gz - - build/netbsd_386.tar.gz - - build/openbsd_arm.tar.gz - - build/openbsd_amd64.tar.gz - - build/openbsd_386.tar.gz + - build/linux_arm.tar.gz + - build/linux_arm64.tar.gz + - build/linux_386.tar.gz + - build/linux_amd64.tar.gz + - build/linux_amd64_large_disk.tar.gz + - build/darwin_amd64.tar.gz + - build/darwin_amd64_large_disk.tar.gz + - build/windows_386.zip + - build/windows_amd64.zip + - build/windows_amd64_large_disk.zip + - build/freebsd_arm.tar.gz + - build/freebsd_amd64.tar.gz + - build/freebsd_386.tar.gz + - build/netbsd_arm.tar.gz + - build/netbsd_amd64.tar.gz + - build/netbsd_386.tar.gz + - build/openbsd_arm.tar.gz + - build/openbsd_amd64.tar.gz + - build/openbsd_386.tar.gz on: tags: true repo: chrislusf/seaweedfs - go: tip + go: 1.14.x diff --git a/Dockerfile.go_build b/Dockerfile.go_build deleted file mode 100644 index d0bc21c91..000000000 --- a/Dockerfile.go_build +++ /dev/null @@ -1,6 +0,0 @@ -FROM cydev/go -RUN go get github.com/chrislusf/seaweedfs/weed -EXPOSE 8080 -EXPOSE 9333 -VOLUME /data -ENTRYPOINT ["weed"]
\ No newline at end of file @@ -8,10 +8,13 @@ appname := weed sources := $(wildcard *.go) -build = GOOS=$(1) GOARCH=$(2) go build -o build/$(appname)$(3) $(SOURCE_DIR) +build = CGO_ENABLED=0 GOOS=$(1) GOARCH=$(2) go build -ldflags "-extldflags -static" -o build/$(appname)$(3) $(SOURCE_DIR) tar = cd build && tar -cvzf $(1)_$(2).tar.gz $(appname)$(3) && rm $(appname)$(3) zip = cd build && zip $(1)_$(2).zip $(appname)$(3) && rm $(appname)$(3) +build_large = CGO_ENABLED=0 GOOS=$(1) GOARCH=$(2) go build -tags 5BytesOffset -ldflags "-extldflags -static" -o build/$(appname)$(3) $(SOURCE_DIR) +tar_large = cd build && tar -cvzf $(1)_$(2)_large_disk.tar.gz $(appname)$(3) && rm $(appname)$(3) +zip_large = cd build && zip $(1)_$(2)_large_disk.zip $(appname)$(3) && rm $(appname)$(3) all: build @@ -24,6 +27,8 @@ clean: deps: go get $(GO_FLAGS) -d $(SOURCE_DIR) + rm -rf /home/travis/gopath/src/github.com/coreos/etcd/vendor/golang.org/x/net/trace + rm -rf /home/travis/gopath/src/go.etcd.io/etcd/vendor/golang.org/x/net/trace build: deps go build $(GO_FLAGS) -o $(BINARY) $(SOURCE_DIR) @@ -32,9 +37,21 @@ linux: deps mkdir -p linux GOOS=linux GOARCH=amd64 go build $(GO_FLAGS) -o linux/$(BINARY) $(SOURCE_DIR) -release: deps windows_build darwin_build linux_build bsd_build +release: deps windows_build darwin_build linux_build bsd_build 5_byte_linux_build 5_byte_darwin_build 5_byte_windows_build ##### LINUX BUILDS ##### +5_byte_linux_build: + $(call build_large,linux,amd64,) + $(call tar_large,linux,amd64) + +5_byte_darwin_build: + $(call build_large,darwin,amd64,) + $(call tar_large,darwin,amd64) + +5_byte_windows_build: + $(call build_large,windows,amd64,.exe) + $(call zip_large,windows,amd64,.exe) + linux_build: build/linux_arm.tar.gz build/linux_arm64.tar.gz build/linux_386.tar.gz build/linux_amd64.tar.gz build/linux_386.tar.gz: $(sources) @@ -1,25 +1,25 @@ # SeaweedFS + [](https://travis-ci.org/chrislusf/seaweedfs) [](https://godoc.org/github.com/chrislusf/seaweedfs/weed) [](https://github.com/chrislusf/seaweedfs/wiki) +[](https://hub.docker.com/r/chrislusf/seaweedfs/)  <h2 align="center">Supporting SeaweedFS</h2> -SeaweedFS is Apache-licensed open source project, independent project with its ongoing development made -possible entirely thanks to the support by these awesome [backers](https://github.com/chrislusf/seaweedfs/blob/master/backers.md). -If you'd like to grow SeaweedFS even stronger, please consider to -<a href="https://www.patreon.com/seaweedfs">Sponsor SeaweedFS via Patreon</a>. - -Platinum ($2500/month), Gold ($500/month): put your company logo on the SeaweedFS github page -Generous Backer($50/month), Backer($10/month): put your name on the SeaweedFS backer page. +SeaweedFS is an independent Apache-licensed open source project with its ongoing development made +possible entirely thanks to the support of these awesome [backers](https://github.com/chrislusf/seaweedfs/blob/master/backers.md). +If you'd like to grow SeaweedFS even stronger, please consider joining our +<a href="https://www.patreon.com/seaweedfs">sponsors on Patreon</a>. Your support will be really appreciated by me and other supporters! -<h3 align="center"><a href="https://www.patreon.com/seaweedfs">Sponsors SeaweedFS via Patreon</a></h3> +<h3 align="center"><a href="https://www.patreon.com/seaweedfs">Sponsor SeaweedFS via Patreon</a></h3> +<!-- <h4 align="center">Platinum</h4> <p align="center"> @@ -43,58 +43,99 @@ Your support will be really appreciated by me and other supporters! </tbody> </table> +--> + --- - [Download Binaries for different platforms](https://github.com/chrislusf/seaweedfs/releases/latest) -- [SeaweedFS on Slack](https://join.slack.com/t/seaweedfs/shared_invite/enQtMzI4MTMwMjU2MzA3LTc4MmVlYmFlNjBmZTgzZmJlYmI1MDE1YzkyNWYyZjkwZDFiM2RlMDdjNjVlNjdjYzc4NGFhZGIyYzEyMzJkYTA) +- [SeaweedFS on Slack](https://join.slack.com/t/seaweedfs/shared_invite/enQtMzI4MTMwMjU2MzA3LTEyYzZmZWYzOGQ3MDJlZWMzYmI0OTE4OTJiZjJjODBmMzUxNmYwODg0YjY3MTNlMjBmZDQ1NzQ5NDJhZWI2ZmY) - [SeaweedFS Mailing List](https://groups.google.com/d/forum/seaweedfs) - [Wiki Documentation](https://github.com/chrislusf/seaweedfs/wiki) - - -## Introduction +- [SeaweedFS Introduction Slides](https://www.slideshare.net/chrislusf/seaweedfs-introduction) + +Table of Contents +================= + +* [Introduction](#introduction) +* [Features](#features) + * [Additional Features](#additional-features) + * [Filer Features](#filer-features) +* [Example Usage](#example-usage) +* [Architecture](#architecture) +* [Compared to Other File Systems](#compared-to-other-file-systems) + * [Compared to HDFS](#compared-to-hdfs) + * [Compared to GlusterFS, Ceph](#compared-to-glusterfs-ceph) + * [Compared to GlusterFS](#compared-to-glusterfs) + * [Compared to Ceph](#compared-to-ceph) +* [Dev Plan](#dev-plan) +* [Installation Guide](#installation-guide) +* [Disk Related Topics](#disk-related-topics) +* [Benchmark](#Benchmark) +* [License](#license) + +## Introduction ## SeaweedFS is a simple and highly scalable distributed file system. There are two objectives: 1. to store billions of files! 2. to serve the files fast! -SeaweedFS started as an Object Store to handle small files efficiently. Instead of managing all file metadata in a central master, the central master only manages file volumes, and it lets these volume servers manage files and their metadata. This relieves concurrency pressure from the central master and spreads file metadata into volume servers, allowing faster file access (just one disk read operation). +SeaweedFS started as an Object Store to handle small files efficiently. Instead of managing all file metadata in a central master, the central master only manages file volumes, and it lets these volume servers manage files and their metadata. This relieves concurrency pressure from the central master and spreads file metadata into volume servers, allowing faster file access (O(1), usually just one disk read operation). + +SeaweedFS can transparently integrate with the cloud. With hot data on local cluster, and warm data on the cloud with O(1) access time, SeaweedFS can achieve both fast local access time and elastic cloud storage capacity, without any client side changes. -There is only a 40 bytes disk storage overhead for each file's metadata. It is so simple with O(1) disk read that you are welcome to challenge the performance with your actual use cases. +There is only 40 bytes of disk storage overhead for each file's metadata. It is so simple with O(1) disk reads that you are welcome to challenge the performance with your actual use cases. -SeaweedFS started by implementing [Facebook's Haystack design paper](http://www.usenix.org/event/osdi10/tech/full_papers/Beaver.pdf). +SeaweedFS started by implementing [Facebook's Haystack design paper](http://www.usenix.org/event/osdi10/tech/full_papers/Beaver.pdf). Also, SeaweedFS implements erasure coding with ideas from [f4: Facebook’s Warm BLOB Storage System](https://www.usenix.org/system/files/conference/osdi14/osdi14-paper-muralidhar.pdf) -SeaweedFS can work very well with just the object store. [[Filer]] is added later to support directories and POSIX attributes. Filer is a separate linearly-scalable stateless server with customizable metadata stores, e.g., MySql/Postgres/Redis/Cassandra/LevelDB. +On top of the object store, optional [Filer] can support directories and POSIX attributes. Filer is a separate linearly-scalable stateless server with customizable metadata stores, e.g., MySql, Postgres, Redis, Etcd, Cassandra, LevelDB, MemSql, TiDB, TiKV, CockroachDB, etc. -## Additional Features -* Can choose no replication or different replication level, rack and data center aware -* Automatic master servers failover - no single point of failure (SPOF) -* Automatic Gzip compression depending on file mime type -* Automatic compaction to reclaimed disk spaces after deletion or update +[Back to TOC](#table-of-contents) + +## Additional Features ## +* Can choose no replication or different replication levels, rack and data center aware. +* Automatic master servers failover - no single point of failure (SPOF). +* Automatic Gzip compression depending on file mime type. +* Automatic compaction to reclaim disk space after deletion or update. * Servers in the same cluster can have different disk spaces, file systems, OS etc. -* Adding/Removing servers does **not** cause any data re-balancing -* Optionally fix the orientation for jpeg pictures -* Support Etag, Accept-Range, Last-Modified, etc. -* Support in-memory/leveldb/boltdb/btree mode tuning for memory/performance balance. -## Filer Features +* Adding/Removing servers does **not** cause any data re-balancing. +* Optionally fix the orientation for jpeg pictures. +* Support ETag, Accept-Range, Last-Modified, etc. +* Support in-memory/leveldb/readonly mode tuning for memory/performance balance. +* Support rebalancing the writable and readonly volumes. +* [Transparent cloud integration][CloudTier]: unlimited capacity via tiered cloud storage for warm data. +* [Erasure Coding for warm storage][ErasureCoding] Rack-Aware 10.4 erasure coding reduces storage cost and increases availability. + +[Back to TOC](#table-of-contents) + +## Filer Features ## * [filer server][Filer] provide "normal" directories and files via http. * [mount filer][Mount] to read and write files directly as a local directory via FUSE. * [Amazon S3 compatible API][AmazonS3API] to access files with S3 tooling. -* [Async Backup To Cloud][BackupToCloud] can enjoy extreme fast local access and backup to Amazon S3, Google Cloud Storage, Azure, BackBlaze. +* [Hadoop Compatible File System][Hadoop] to access files from Hadoop/Spark/Flink/etc jobs. +* [Async Backup To Cloud][BackupToCloud] has extremely fast local access and backups to Amazon S3, Google Cloud Storage, Azure, BackBlaze. +* [WebDAV] access as a mapped drive on Mac and Windows, or from mobile devices. [Filer]: https://github.com/chrislusf/seaweedfs/wiki/Directories-and-Files [Mount]: https://github.com/chrislusf/seaweedfs/wiki/Mount [AmazonS3API]: https://github.com/chrislusf/seaweedfs/wiki/Amazon-S3-API [BackupToCloud]: https://github.com/chrislusf/seaweedfs/wiki/Backup-to-Cloud +[Hadoop]: https://github.com/chrislusf/seaweedfs/wiki/Hadoop-Compatible-File-System +[WebDAV]: https://github.com/chrislusf/seaweedfs/wiki/WebDAV +[ErasureCoding]: https://github.com/chrislusf/seaweedfs/wiki/Erasure-coding-for-warm-storage +[CloudTier]: https://github.com/chrislusf/seaweedfs/wiki/Cloud-Tier + +[Back to TOC](#table-of-contents) + +## Example Usage ## -## Example Usage By default, the master node runs on port 9333, and the volume nodes run on port 8080. -Here I will start one master node, and two volume nodes on port 8080 and 8081. Ideally, they should be started from different machines. I just use localhost as an example. +Let's start one master node, and two volume nodes on port 8080 and 8081. Ideally, they should be started from different machines. We'll use localhost as an example. -SeaweedFS uses HTTP REST operations to write, read, delete. The responses are in JSON or JSONP format. +SeaweedFS uses HTTP REST operations to read, write, and delete. The responses are in JSON or JSONP format. -### Start Master Server +### Start Master Server ### ``` > ./weed master @@ -107,7 +148,6 @@ SeaweedFS uses HTTP REST operations to write, read, delete. The responses are in > weed volume -dir="/tmp/data2" -max=10 -mserver="localhost:9333" -port=8081 & ``` - ### Write File ### To upload a file: first, send a HTTP POST, PUT, or GET request to `/dir/assign` to get an `fid` and a volume server url: @@ -121,7 +161,7 @@ Second, to store the file content, send a HTTP multi-part POST request to `url + ``` > curl -F file=@/home/chris/myphoto.jpg http://127.0.0.1:8080/3,01637037d6 -{"size": 43234} +{"name":"myphoto.jpg","size":43234,"eTag":"1cc0118e"} ``` To update, send another POST request with updated file content. @@ -131,19 +171,20 @@ For deletion, send an HTTP DELETE request to the same `url + '/' + fid` URL: ``` > curl -X DELETE http://127.0.0.1:8080/3,01637037d6 ``` + ### Save File Id ### -Now you can save the `fid`, 3,01637037d6 in this case, to a database field. +Now, you can save the `fid`, 3,01637037d6 in this case, to a database field. The number 3 at the start represents a volume id. After the comma, it's one file key, 01, and a file cookie, 637037d6. The volume id is an unsigned 32-bit integer. The file key is an unsigned 64-bit integer. The file cookie is an unsigned 32-bit integer, used to prevent URL guessing. -The file key and file cookie are both coded in hex. You can store the <volume id, file key, file cookie> tuple in your own format, or simply store the `fid` as string. +The file key and file cookie are both coded in hex. You can store the <volume id, file key, file cookie> tuple in your own format, or simply store the `fid` as a string. -If stored as a string, in theory, you would need 8+1+16+8=33 bytes. A char(33) would be enough, if not more than enough, since most usage would not need 2^32 volumes. +If stored as a string, in theory, you would need 8+1+16+8=33 bytes. A char(33) would be enough, if not more than enough, since most uses will not need 2^32 volumes. -If space is really a concern, you can store the file id in your own format. You would need one 4-byte integer for volume id, 8-byte long number for file key, 4-byte integer for file cookie. So 16 bytes are more than enough. +If space is really a concern, you can store the file id in your own format. You would need one 4-byte integer for volume id, 8-byte long number for file key, and a 4-byte integer for the file cookie. So 16 bytes are more than enough. ### Read File ### @@ -153,7 +194,7 @@ First look up the volume server's URLs by the file's volumeId: ``` > curl http://localhost:9333/dir/lookup?volumeId=3 -{"locations":[{"publicUrl":"localhost:8080","url":"localhost:8080"}]} +{"volumeId":"3","locations":[{"publicUrl":"localhost:8080","url":"localhost:8080"}]} ``` Since (usually) there are not too many volume servers, and volumes don't move often, you can cache the results most of the time. Depending on the replication type, one volume can have multiple replica locations. Just randomly pick one location to read. @@ -209,9 +250,9 @@ More details about replication can be found [on the wiki][Replication]. You can also set the default replication strategy when starting the master server. -### Allocate File Key on specific data center ### +### Allocate File Key on Specific Data Center ### -Volume servers can start with a specific data center name: +Volume servers can be started with a specific data center name: ``` weed volume -dir=/tmp/1 -port=8080 -dataCenter=dc1 @@ -235,13 +276,15 @@ When requesting a file key, an optional "dataCenter" parameter can limit the ass [feat-3]: https://github.com/chrislusf/seaweedfs/wiki/Optimization#upload-large-files [feat-4]: https://github.com/chrislusf/seaweedfs/wiki/Optimization#collection-as-a-simple-name-space +[Back to TOC](#table-of-contents) + ## Architecture ## Usually distributed file systems split each file into chunks, a central master keeps a mapping of filenames, chunk indices to chunk handles, and also which chunks each chunk server has. -The main drawback is that the central master can't handle many small files efficiently, and since all read requests need to go through the chunk master, might not scale well for many concurrent users. +The main drawback is that the central master can't handle many small files efficiently, and since all read requests need to go through the chunk master, so it might not scale well for many concurrent users. -Instead of managing chunks, SeaweedFS manages data volumes in the master server. Each data volume is size 32GB, and can hold a lot of files. And each storage node can have many data volumes. So the master node only needs to store the metadata about the volumes, which is fairly small amount of data and is generally stable. +Instead of managing chunks, SeaweedFS manages data volumes in the master server. Each data volume is 32GB in size, and can hold a lot of files. And each storage node can have many data volumes. So the master node only needs to store the metadata about the volumes, which is a fairly small amount of data and is generally stable. The actual file metadata is stored in each volume on volume servers. Since each volume server only manages metadata of files on its own disk, with only 16 bytes for each file, all file access can read file metadata just from memory and only needs one disk operation to actually read file data. @@ -251,7 +294,7 @@ For comparison, consider that an xfs inode structure in Linux is 536 bytes. The architecture is fairly simple. The actual data is stored in volumes on storage nodes. One volume server can have multiple volumes, and can both support read and write access with basic authentication. -All volumes are managed by a master server. The master server contains volume id to volume server mapping. This is fairly static information, and could be cached easily. +All volumes are managed by a master server. The master server contains the volume id to volume server mapping. This is fairly static information, and can be easily cached. On each write request, the master server also generates a file key, which is a growing 64-bit unsigned integer. Since write requests are not generally as frequent as read requests, one master server should be able to handle the concurrency well. @@ -259,28 +302,41 @@ On each write request, the master server also generates a file key, which is a g When a client sends a write request, the master server returns (volume id, file key, file cookie, volume node url) for the file. The client then contacts the volume node and POSTs the file content. -When a client needs to read a file based on (volume id, file key, file cookie), it can ask the master server by the volume id for the (volume node url, volume node public url), or retrieve this from a cache. Then the client can GET the content, or just render the URL on web pages and let browsers fetch the content. +When a client needs to read a file based on (volume id, file key, file cookie), it asks the master server by the volume id for the (volume node url, volume node public url), or retrieves this from a cache. Then the client can GET the content, or just render the URL on web pages and let browsers fetch the content. Please see the example for details on the write-read process. ### Storage Size ### -In the current implementation, each volume can be 8x2^32 bytes (32GiB). This is because of we align content to 8 bytes. We can easily increase this to 64G, or 128G, or more, by changing 2 lines of code, at the cost of some wasted padding space due to alignment. +In the current implementation, each volume can hold 32 gibibytes (32GiB or 8x2^32 bytes). This is because we align content to 8 bytes. We can easily increase this to 64GiB, or 128GiB, or more, by changing 2 lines of code, at the cost of some wasted padding space due to alignment. -There can be 2^32 volumes. So total system size is 8 x 2^32 bytes x 2^32 = 8 x 4GiB x 4Gi = 128EiB (2^67 bytes, or 128 exbibytes). +There can be 4 gibibytes (4GiB or 2^32 bytes) of volumes. So the total system size is 8 x 4GiB x 4GiB which is 128 exbibytes (128EiB or 2^67 bytes). Each individual file size is limited to the volume size. ### Saving memory ### -All file meta information on volume server is readable from memory without disk access. Each file just takes an 16-byte map entry of <64bit key, 32bit offset, 32bit size>. Of course, each map entry has its own the space cost for the map. But usually the disk runs out before the memory does. +All file meta information stored on an volume server is readable from memory without disk access. Each file takes just a 16-byte map entry of <64bit key, 32bit offset, 32bit size>. Of course, each map entry has its own space cost for the map. But usually the disk space runs out before the memory does. + +### Tiered Storage to the cloud ### +The local volume servers are much faster, while cloud storages have elastic capacity and are actually more cost-efficient if not accessed often (usually free to upload, but relatively costly to access). With the append-only structure and O(1) access time, SeaweedFS can take advantage of both local and cloud storage by offloading the warm data to the cloud. + +Usually hot data are fresh and warm data are old. SeaweedFS puts the newly created volumes on local servers, and optionally upload the older volumes on the cloud. If the older data are accessed less often, this literally gives you unlimited capacity with limited local servers, and still fast for new data. + +With the O(1) access time, the network latency cost is kept at minimum. + +If the hot~warm data is split as 20~80, with 20 servers, you can achieve storage capacity of 100 servers. That's a cost saving of 80%! Or you can repurpose the 80 servers to store new data also, and get 5X storage throughput. + +[Back to TOC](#table-of-contents) ## Compared to Other File Systems ## Most other distributed file systems seem more complicated than necessary. -SeaweedFS is meant to be fast and simple, both during usage and during setup. If you do not understand how it works when you reach here, we failed! Please raise an issue with any questions or update this file with clarifications. +SeaweedFS is meant to be fast and simple, in both setup and operation. If you do not understand how it works when you reach here, we've failed! Please raise an issue with any questions or update this file with clarifications. + +[Back to TOC](#table-of-contents) ### Compared to HDFS ### @@ -290,6 +346,7 @@ SeaweedFS is ideal for serving relatively smaller files quickly and concurrently SeaweedFS can also store extra large files by splitting them into manageable data chunks, and store the file ids of the data chunks into a meta chunk. This is managed by "weed upload/download" tool, and the weed master or volume servers are agnostic about it. +[Back to TOC](#table-of-contents) ### Compared to GlusterFS, Ceph ### @@ -297,7 +354,7 @@ The architectures are mostly the same. SeaweedFS aims to store and read files fa * SeaweedFS optimizes for small files, ensuring O(1) disk seek operation, and can also handle large files. * SeaweedFS statically assigns a volume id for a file. Locating file content becomes just a lookup of the volume id, which can be easily cached. -* SeaweedFS Filer metadata store can be any well-known and proven data stores, e.g., Cassandra, Redis, MySql, Postgres, etc, and is easy to customized. +* SeaweedFS Filer metadata store can be any well-known and proven data stores, e.g., Cassandra, Redis, Etcd, MySql, Postgres, MemSql, TiDB, CockroachDB, etc, and is easy to customized. * SeaweedFS Volume server also communicates directly with clients via HTTP, supporting range queries, direct uploads, etc. | System | File Meta | File Content Read| POSIX | REST API | Optimized for small files | @@ -307,25 +364,29 @@ The architectures are mostly the same. SeaweedFS aims to store and read files fa | GlusterFS | hashing | | FUSE, NFS | | | | Ceph | hashing + rules | | FUSE | Yes | | +[Back to TOC](#table-of-contents) + ### Compared to GlusterFS ### GlusterFS stores files, both directories and content, in configurable volumes called "bricks". GlusterFS hashes the path and filename into ids, and assigned to virtual volumes, and then mapped to "bricks". +[Back to TOC](#table-of-contents) + ### Compared to Ceph ### Ceph can be setup similar to SeaweedFS as a key->blob store. It is much more complicated, with the need to support layers on top of it. [Here is a more detailed comparison](https://github.com/chrislusf/seaweedfs/issues/120) -SeaweedFS has a centralized master group to look up free volumes, while Ceph uses hashing and metadata servers to locate its objects. Having a centralized master makes it easy to code and manage. +SeaweedFS has a centralized master group to look up free volumes, while Ceph uses hashing and metadata servers to locate its objects. Having a centralized master makes it easy to code and manage. -Same as SeaweedFS, Ceph is also based on a object store RADOS. Ceph is rather complicated with mixed reviews. +Same as SeaweedFS, Ceph is also based on the object store RADOS. Ceph is rather complicated with mixed reviews. Ceph uses CRUSH hashing to automatically manage the data placement. SeaweedFS places data by assigned volumes. SeaweedFS is optimized for small files. Small files are stored as one continuous block of content, with at most 8 unused bytes between files. Small file access is O(1) disk read. -SeaweedFS Filer uses off-the-shelf stores, such as MySql, Postgres, Redis, Cassandra, to manage file directories. There are proven, scalable, and easier to manage. +SeaweedFS Filer uses off-the-shelf stores, such as MySql, Postgres, Redis, Etcd, Cassandra, MemSql, TiDB, CockroachCB, to manage file directories. There are proven, scalable, and easier to manage. | SeaweedFS | comparable to Ceph | advantage | | ------------- | ------------- | ---------------- | @@ -333,107 +394,121 @@ SeaweedFS Filer uses off-the-shelf stores, such as MySql, Postgres, Redis, Cassa | Volume | OSD | optimized for small files | | Filer | Ceph FS | linearly scalable, Customizable, O(1) or O(logN) | +[Back to TOC](#table-of-contents) -## Dev plan ## +## Dev Plan ## More tools and documentation, on how to maintain and scale the system. For example, how to move volumes, automatically balancing data, how to grow volumes, how to check system status, etc. Other key features include: Erasure Encoding, JWT security. -This is a super exciting project! And I need helpers and [support](https://www.patreon.com/seaweedfs)! +This is a super exciting project! And we need helpers and [support](https://www.patreon.com/seaweedfs)! +BTW, We suggest run the code style check script `util/gostd` before you push your branch to remote, it will make SeaweedFS easy to review, maintain and develop: -## Installation guide for users who are not familiar with golang +``` +$ ./util/gostd +``` + +[Back to TOC](#table-of-contents) + +## Installation Guide ## + +> Installation guide for users who are not familiar with golang -step 1: install go on your machine and setup the environment by following the instructions from the following link: +Step 1: install go on your machine and setup the environment by following the instructions at: https://golang.org/doc/install make sure you set up your $GOPATH -step 2: also you may need to install Mercurial by following the instructions below +Step 2: also you may need to install Mercurial by following the instructions at: http://mercurial.selenic.com/downloads -step 3: download, compile, and install the project by executing the following command +Step 3: download, compile, and install the project by executing the following command ```bash go get github.com/chrislusf/seaweedfs/weed ``` -once this is done, you should see the executable "weed" under `$GOPATH/bin` +Once this is done, you will find the executable "weed" in your `$GOPATH/bin` directory -step 4: after you modify your code locally, you could start a local build by calling `go install` under +Step 4: after you modify your code locally, you could start a local build by calling `go install` under ``` $GOPATH/src/github.com/chrislusf/seaweedfs/weed ``` -## Disk Related topics ## +[Back to TOC](#table-of-contents) + +## Disk Related Topics ## ### Hard Drive Performance ### -When testing read performance on SeaweedFS, it basically becomes performance test for your hard drive's random read speed. Hard Drive usually get 100MB/s~200MB/s. +When testing read performance on SeaweedFS, it basically becomes a performance test of your hard drive's random read speed. Hard drives usually get 100MB/s~200MB/s. -### Solid State Disk +### Solid State Disk ### To modify or delete small files, SSD must delete a whole block at a time, and move content in existing blocks to a new block. SSD is fast when brand new, but will get fragmented over time and you have to garbage collect, compacting blocks. SeaweedFS is friendly to SSD since it is append-only. Deletion and compaction are done on volume level in the background, not slowing reading and not causing fragmentation. -## Benchmark +[Back to TOC](#table-of-contents) + +## Benchmark ## My Own Unscientific Single Machine Results on Mac Book with Solid State Disk, CPU: 1 Intel Core i7 2.6GHz. Write 1 million 1KB file: ``` Concurrency Level: 16 -Time taken for tests: 88.796 seconds +Time taken for tests: 66.753 seconds Complete requests: 1048576 Failed requests: 0 -Total transferred: 1106764659 bytes -Requests per second: 11808.87 [#/sec] -Transfer rate: 12172.05 [Kbytes/sec] +Total transferred: 1106789009 bytes +Requests per second: 15708.23 [#/sec] +Transfer rate: 16191.69 [Kbytes/sec] Connection Times (ms) min avg max std -Total: 0.2 1.3 44.8 0.9 +Total: 0.3 1.0 84.3 0.9 Percentage of the requests served within a certain time (ms) - 50% 1.1 ms - 66% 1.3 ms - 75% 1.5 ms - 80% 1.7 ms - 90% 2.1 ms - 95% 2.6 ms - 98% 3.7 ms - 99% 4.6 ms - 100% 44.8 ms + 50% 0.8 ms + 66% 1.0 ms + 75% 1.1 ms + 80% 1.2 ms + 90% 1.4 ms + 95% 1.7 ms + 98% 2.1 ms + 99% 2.6 ms + 100% 84.3 ms ``` Randomly read 1 million files: ``` Concurrency Level: 16 -Time taken for tests: 34.263 seconds +Time taken for tests: 22.301 seconds Complete requests: 1048576 Failed requests: 0 -Total transferred: 1106762945 bytes -Requests per second: 30603.34 [#/sec] -Transfer rate: 31544.49 [Kbytes/sec] +Total transferred: 1106812873 bytes +Requests per second: 47019.38 [#/sec] +Transfer rate: 48467.57 [Kbytes/sec] Connection Times (ms) min avg max std -Total: 0.0 0.5 20.7 0.7 +Total: 0.0 0.3 54.1 0.2 Percentage of the requests served within a certain time (ms) - 50% 0.4 ms - 75% 0.5 ms - 95% 0.6 ms - 98% 0.8 ms - 99% 1.2 ms - 100% 20.7 ms + 50% 0.3 ms + 90% 0.4 ms + 98% 0.6 ms + 99% 0.7 ms + 100% 54.1 ms ``` +[Back to TOC](#table-of-contents) -## License +## License ## Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -447,7 +522,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. +The text of this page is available for modification and reuse under the terms of the Creative Commons Attribution-Sharealike 3.0 Unported License and the GNU Free Documentation License (unversioned, with no invariant sections, front-cover texts, or back-cover texts). + +[Back to TOC](#table-of-contents) -## Stargazers over time +## Stargazers over time ## [](https://starcharts.herokuapp.com/chrislusf/seaweedfs) diff --git a/backers.md b/backers.md index 98767524e..97a5e9081 100644 --- a/backers.md +++ b/backers.md @@ -4,6 +4,7 @@ <h2 align="center">Generous Backers ($50+)</h2> +- [4Sight Imaging](https://www.4sightimaging.com/) - [Evercam Camera Management Software](https://evercam.io/) <h2 align="center">Backers</h2> diff --git a/docker/Dockerfile b/docker/Dockerfile index 2c0c0f047..38117a3dc 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,9 +1,20 @@ -FROM frolvlad/alpine-glibc:alpine-3.5 +FROM frolvlad/alpine-glibc +# Supercronic install settings +ENV SUPERCRONIC_URL=https://github.com/aptible/supercronic/releases/download/v0.1.8/supercronic-linux-amd64 \ + SUPERCRONIC=supercronic-linux-amd64 \ + SUPERCRONIC_SHA1SUM=be43e64c45acd6ec4fce5831e03759c89676a0ea + +# Install SeaweedFS and Supercronic ( for cron job mode ) # Tried to use curl only (curl -o /tmp/linux_amd64.tar.gz ...), however it turned out that the following tar command failed with "gzip: stdin: not in gzip format" RUN apk add --no-cache --virtual build-dependencies --update wget curl ca-certificates && \ wget -P /tmp https://github.com/$(curl -s -L https://github.com/chrislusf/seaweedfs/releases/latest | egrep -o 'chrislusf/seaweedfs/releases/download/.*/linux_amd64.tar.gz') && \ tar -C /usr/bin/ -xzvf /tmp/linux_amd64.tar.gz && \ + curl -fsSLO "$SUPERCRONIC_URL" && \ + echo "${SUPERCRONIC_SHA1SUM} ${SUPERCRONIC}" | sha1sum -c - && \ + chmod +x "$SUPERCRONIC" && \ + mv "$SUPERCRONIC" "/usr/local/bin/${SUPERCRONIC}" && \ + ln -s "/usr/local/bin/${SUPERCRONIC}" /usr/local/bin/supercronic && \ apk del build-dependencies && \ rm -rf /tmp/* @@ -15,8 +26,14 @@ EXPOSE 8080 EXPOSE 18888 # filer server http port EXPOSE 8888 -# master server shared gprc+http port +# master server shared gprc port +EXPOSE 19333 +# master server shared http port EXPOSE 9333 +# s3 server http port +EXPOSE 8333 + +RUN mkdir -p /data/filerldb2 VOLUME /data diff --git a/docker/Dockerfile.go_build b/docker/Dockerfile.go_build new file mode 100644 index 000000000..306ce3aa1 --- /dev/null +++ b/docker/Dockerfile.go_build @@ -0,0 +1,35 @@ +FROM frolvlad/alpine-glibc as builder +RUN apk add git go g++ +RUN mkdir -p /go/src/github.com/chrislusf/ +RUN git clone https://github.com/chrislusf/seaweedfs /go/src/github.com/chrislusf/seaweedfs +RUN cd /go/src/github.com/chrislusf/seaweedfs/weed && go install + +FROM alpine AS final +LABEL author="Chris Lu" +COPY --from=builder /root/go/bin/weed /usr/bin/ +RUN mkdir -p /etc/seaweedfs +COPY --from=builder /go/src/github.com/chrislusf/seaweedfs/docker/filer.toml /etc/seaweedfs/filer.toml +COPY --from=builder /go/src/github.com/chrislusf/seaweedfs/docker/entrypoint.sh /entrypoint.sh + +# volume server gprc port +EXPOSE 18080 +# volume server http port +EXPOSE 8080 +# filer server gprc port +EXPOSE 18888 +# filer server http port +EXPOSE 8888 +# master server shared gprc port +EXPOSE 19333 +# master server shared http port +EXPOSE 9333 +# s3 server http port +EXPOSE 8333 + +RUN mkdir -p /data/filerldb2 + +VOLUME /data + +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/Dockerfile.local b/docker/Dockerfile.local new file mode 100644 index 000000000..b4a7b6504 --- /dev/null +++ b/docker/Dockerfile.local @@ -0,0 +1,29 @@ +FROM alpine AS final +LABEL author="Chris Lu" +COPY ./weed /usr/bin/ +RUN mkdir -p /etc/seaweedfs +COPY ./filer.toml /etc/seaweedfs/filer.toml +COPY ./entrypoint.sh /entrypoint.sh + +# volume server gprc port +EXPOSE 18080 +# volume server http port +EXPOSE 8080 +# filer server gprc port +EXPOSE 18888 +# filer server http port +EXPOSE 8888 +# master server shared gprc port +EXPOSE 19333 +# master server shared http port +EXPOSE 9333 +# s3 server http port +EXPOSE 8333 + +RUN mkdir -p /data/filerldb2 + +VOLUME /data + +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/Makefile b/docker/Makefile new file mode 100644 index 000000000..166188bc3 --- /dev/null +++ b/docker/Makefile @@ -0,0 +1,19 @@ +all: gen + +.PHONY : gen + +gen: dev + +build: + cd ../weed; GOOS=linux go build; mv weed ../docker/ + docker build --no-cache -t chrislusf/seaweedfs:local -f Dockerfile.local . + rm ./weed + +dev: build + docker-compose -f local-dev-compose.yml -p seaweedfs up + +cluster: build + docker-compose -f local-cluster-compose.yml -p seaweedfs up + +clean: + rm ./weed diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..65241b517 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,29 @@ +# Docker + + +## Try it out + +```bash + +wget https://raw.githubusercontent.com/chrislusf/seaweedfs/master/docker/seaweedfs-compose.yml + +docker-compose -f seaweedfs-compose.yml -p seaweedfs up + +``` + +## Try latest tip + +```bash + +wget https://raw.githubusercontent.com/chrislusf/seaweedfs/master/docker/seaweedfs-dev-compose.yml + +docker-compose -f seaweedfs-dev-compose.yml -p seaweedfs up + +``` + +## Local Development + +```bash +cd $GOPATH/src/github.com/chrislusf/seaweedfs/docker +make +``` diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml deleted file mode 100644 index 5e7d2e8b1..000000000 --- a/docker/docker-compose.yml +++ /dev/null @@ -1,40 +0,0 @@ -version: '2' - -services: - master: - #image: chrislusf/seaweedfs # use a remote image - build: . # build our container from the local Dockerfile - ports: - - 9333:9333 - command: "master" - networks: - default: - aliases: - - seaweed_master - volume: - #image: chrislusf/seaweedfs # use a remote image - build: . # build our container from the local Dockerfile - ports: - - 8080:8080 - - 18080:18080 - command: 'volume -max=5 -mserver="master:9333" -port=8080' - depends_on: - - master - networks: - default: - aliases: - - seaweed_volume - filer: - #image: chrislusf/seaweedfs # use a remote image - build: . # build our container from the local Dockerfile - ports: - - 8888:8888 - - 18888:18888 - command: 'filer -master="master:9333"' - depends_on: - - master - - volume - networks: - default: - aliases: - - seaweed_filer diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 6fd97ad9b..791527d3a 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -3,7 +3,7 @@ case "$1" in 'master') - ARGS="-ip `hostname -i` -mdir /data" + ARGS="-mdir /data" # Is this instance linked with an other master? (Docker commandline "--link master1:master") if [ -n "$MASTER_PORT_9333_TCP_ADDR" ] ; then ARGS="$ARGS -peers=$MASTER_PORT_9333_TCP_ADDR:$MASTER_PORT_9333_TCP_PORT" @@ -29,13 +29,31 @@ case "$1" in ;; 'filer') - ARGS="-ip `hostname -i`" + ARGS="" if [ -n "$MASTER_PORT_9333_TCP_ADDR" ] ; then ARGS="$ARGS -master=$MASTER_PORT_9333_TCP_ADDR:$MASTER_PORT_9333_TCP_PORT" fi exec /usr/bin/weed $@ $ARGS ;; + 's3') + ARGS="-domainName=$S3_DOMAIN_NAME -key.file=$S3_KEY_FILE -cert.file=$S3_CERT_FILE" + if [ -n "$FILER_PORT_8888_TCP_ADDR" ] ; then + ARGS="$ARGS -filer=$FILER_PORT_8888_TCP_ADDR:$FILER_PORT_8888_TCP_PORT" + fi + exec /usr/bin/weed $@ $ARGS + ;; + + 'cronjob') + MASTER=${WEED_MASTER-localhost:9333} + FIX_REPLICATION_CRON_SCHEDULE=${CRON_SCHEDULE-*/7 * * * * *} + echo "$FIX_REPLICATION_CRON_SCHEDULE" 'echo "volume.fix.replication" | weed shell -master='$MASTER > /crontab + BALANCING_CRON_SCHEDULE=${CRON_SCHEDULE-25 * * * * *} + echo "$BALANCING_CRON_SCHEDULE" 'echo "volume.balance -c ALL -force" | weed shell -master='$MASTER >> /crontab + echo "Running Crontab:" + cat /crontab + exec supercronic /crontab + ;; *) exec /usr/bin/weed $@ ;; diff --git a/docker/filer.toml b/docker/filer.toml index 35db3d012..a11e5de2b 100644 --- a/docker/filer.toml +++ b/docker/filer.toml @@ -1,3 +1,3 @@ -[leveldb] +[leveldb2] enabled = true -dir = "." +dir = "/data/filerldb2" diff --git a/docker/local-cluster-compose.yml b/docker/local-cluster-compose.yml new file mode 100644 index 000000000..0b6860fa1 --- /dev/null +++ b/docker/local-cluster-compose.yml @@ -0,0 +1,53 @@ +version: '2' + +services: + master0: + image: chrislusf/seaweedfs:local + ports: + - 9333:9333 + - 19333:19333 + command: "master -ip=master0 -port=9333 -peers=master0:9333,master1:9334,master2:9335" + master1: + image: chrislusf/seaweedfs:local + ports: + - 9334:9334 + - 19334:19334 + command: "master -ip=master1 -port=9334 -peers=master0:9333,master1:9334,master2:9335" + master2: + image: chrislusf/seaweedfs:local + ports: + - 9335:9335 + - 19335:19335 + command: "master -ip=master2 -port=9335 -peers=master0:9333,master1:9334,master2:9335" + volume: + image: chrislusf/seaweedfs:local + ports: + - 8080:8080 + - 18080:18080 + command: '-v=2 volume -max=5 -mserver="master0:9333,master1:9334,master2:9335" -port=8080 -ip=volume' + depends_on: + - master0 + - master1 + - master2 + filer: + image: chrislusf/seaweedfs:local + ports: + - 8888:8888 + - 18888:18888 + command: '-v=4 filer -master="master0:9333,master1:9334,master2:9335"' + depends_on: + - master0 + - master1 + - master2 + - volume + s3: + image: chrislusf/seaweedfs:local + ports: + - 8333:8333 + command: '-v=4 s3 -filer="filer:8888"' + depends_on: + - master0 + - master1 + - master2 + - volume + - filer diff --git a/docker/local-dev-compose.yml b/docker/local-dev-compose.yml new file mode 100644 index 000000000..5ff42ed28 --- /dev/null +++ b/docker/local-dev-compose.yml @@ -0,0 +1,35 @@ +version: '2' + +services: + master: + image: chrislusf/seaweedfs:local + ports: + - 9333:9333 + - 19333:19333 + command: "master -ip=master" + volume: + image: chrislusf/seaweedfs:local + ports: + - 8080:8080 + - 18080:18080 + command: '-v=2 volume -max=5 -mserver="master:9333" -port=8080 -ip=volume' + depends_on: + - master + filer: + image: chrislusf/seaweedfs:local + ports: + - 8888:8888 + - 18888:18888 + command: '-v=4 filer -master="master:9333"' + depends_on: + - master + - volume + s3: + image: chrislusf/seaweedfs:local + ports: + - 8333:8333 + command: '-v=4 s3 -filer="filer:8888"' + depends_on: + - master + - volume + - filer diff --git a/docker/seaweedfs-compose.yml b/docker/seaweedfs-compose.yml new file mode 100644 index 000000000..35509c541 --- /dev/null +++ b/docker/seaweedfs-compose.yml @@ -0,0 +1,47 @@ +version: '2' + +services: + master: + image: chrislusf/seaweedfs # use a remote image + ports: + - 9333:9333 + - 19333:19333 + command: "master -ip=master" + volume: + image: chrislusf/seaweedfs # use a remote image + ports: + - 8080:8080 + - 18080:18080 + command: 'volume -max=15 -mserver="master:9333" -port=8080' + depends_on: + - master + filer: + image: chrislusf/seaweedfs # use a remote image + ports: + - 8888:8888 + - 18888:18888 + command: 'filer -master="master:9333"' + tty: true + stdin_open: true + depends_on: + - master + - volume + cronjob: + image: chrislusf/seaweedfs # use a remote image + command: 'cronjob' + environment: + # Run re-replication every 2 minutes + CRON_SCHEDULE: '*/2 * * * * *' # Default: '*/5 * * * * *' + WEED_MASTER: master:9333 # Default: localhost:9333 + depends_on: + - master + - volume + s3: + image: chrislusf/seaweedfs # use a remote image + ports: + - 8333:8333 + command: 's3 -filer="filer:8888"' + depends_on: + - master + - volume + - filer diff --git a/docker/seaweedfs-dev-compose.yml b/docker/seaweedfs-dev-compose.yml new file mode 100644 index 000000000..197510a9f --- /dev/null +++ b/docker/seaweedfs-dev-compose.yml @@ -0,0 +1,35 @@ +version: '2' + +services: + master: + image: chrislusf/seaweedfs:dev # use a remote dev image + ports: + - 9333:9333 + - 19333:19333 + command: "master -ip=master" + volume: + image: chrislusf/seaweedfs:dev # use a remote dev image + ports: + - 8080:8080 + - 18080:18080 + command: '-v=2 volume -max=5 -mserver="master:9333" -port=8080 -ip=volume' + depends_on: + - master + filer: + image: chrislusf/seaweedfs:dev # use a remote dev image + ports: + - 8888:8888 + - 18888:18888 + command: '-v=4 filer -master="master:9333"' + depends_on: + - master + - volume + s3: + image: chrislusf/seaweedfs:dev # use a remote dev image + ports: + - 8333:8333 + command: '-v=4 s3 -filer="filer:8888"' + depends_on: + - master + - volume + - filer @@ -0,0 +1,101 @@ +module github.com/chrislusf/seaweedfs + +go 1.12 + +require ( + cloud.google.com/go v0.44.3 + github.com/Azure/azure-pipeline-go v0.2.2 // indirect + github.com/Azure/azure-storage-blob-go v0.8.0 + github.com/DataDog/zstd v1.4.1 // indirect + github.com/Shopify/sarama v1.23.1 + github.com/aws/aws-sdk-go v1.23.13 + github.com/chrislusf/raft v0.0.0-20190225081310-10d6e2182d92 + github.com/coreos/bbolt v1.3.3 // indirect + github.com/coreos/etcd v3.3.15+incompatible // indirect + github.com/coreos/go-semver v0.3.0 // indirect + github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f // indirect + github.com/dgrijalva/jwt-go v3.2.0+incompatible + github.com/disintegration/imaging v1.6.1 + github.com/dustin/go-humanize v1.0.0 + github.com/eapache/go-resiliency v1.2.0 // indirect + github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a + github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c // indirect + github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect + github.com/facebookgo/stats v0.0.0-20151006221625-1b76add642e4 + github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 // indirect + github.com/frankban/quicktest v1.7.2 // indirect + github.com/gabriel-vasile/mimetype v1.0.0 + github.com/go-redis/redis v6.15.2+incompatible + github.com/go-sql-driver/mysql v1.4.1 + github.com/gocql/gocql v0.0.0-20190829130954-e163eff7a8c6 + github.com/gogo/protobuf v1.2.2-0.20190730201129-28a6bbf47e48 // indirect + github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 // indirect + github.com/golang/protobuf v1.3.2 + github.com/google/btree v1.0.0 + github.com/google/uuid v1.1.1 + github.com/gorilla/mux v1.7.3 + github.com/gorilla/websocket v1.4.1 // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect + github.com/grpc-ecosystem/grpc-gateway v1.11.0 // indirect + github.com/hashicorp/golang-lru v0.5.3 // indirect + github.com/jacobsa/daemonize v0.0.0-20160101105449-e460293e890f + github.com/jcmturner/gofork v1.0.0 // indirect + github.com/karlseguin/ccache v2.0.3+incompatible + github.com/karlseguin/expect v1.0.1 // indirect + github.com/klauspost/cpuid v1.2.1 // indirect + github.com/klauspost/crc32 v1.2.0 + github.com/klauspost/reedsolomon v1.9.2 + github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect + github.com/kurin/blazer v0.5.3 + github.com/lib/pq v1.2.0 + github.com/magiconair/properties v1.8.1 // indirect + github.com/mattn/go-ieproxy v0.0.0-20190805055040-f9202b1cfdeb // indirect + github.com/mattn/go-runewidth v0.0.4 // indirect + github.com/nats-io/nats-server/v2 v2.0.4 // indirect + github.com/onsi/ginkgo v1.10.1 // indirect + github.com/onsi/gomega v1.7.0 // indirect + github.com/pelletier/go-toml v1.4.0 // indirect + github.com/peterh/liner v1.1.0 + github.com/pierrec/lz4 v2.2.7+incompatible // indirect + github.com/prometheus/client_golang v1.1.0 + github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 // indirect + github.com/prometheus/procfs v0.0.4 // indirect + github.com/rakyll/statik v0.1.6 + github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563 // indirect + github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd + github.com/seaweedfs/fuse v0.0.0-20190510212405-310228904eff + github.com/sirupsen/logrus v1.4.2 // indirect + github.com/spaolacci/murmur3 v1.1.0 // indirect + github.com/spf13/afero v1.2.2 // indirect + github.com/spf13/jwalterweatherman v1.1.0 // indirect + github.com/spf13/viper v1.4.0 + github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271 // indirect + github.com/syndtr/goleveldb v1.0.0 + github.com/tidwall/gjson v1.3.2 + github.com/tidwall/match v1.0.1 + github.com/willf/bitset v1.1.10 // indirect + github.com/willf/bloom v2.0.3+incompatible + github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 // indirect + go.etcd.io/bbolt v1.3.3 // indirect + go.etcd.io/etcd v3.3.15+incompatible + go.uber.org/multierr v1.2.0 // indirect + gocloud.dev v0.16.0 + gocloud.dev/pubsub/natspubsub v0.16.0 + gocloud.dev/pubsub/rabbitpubsub v0.16.0 + golang.org/x/crypto v0.0.0-20190911031432-227b76d455e7 // indirect + golang.org/x/image v0.0.0-20190829233526-b3c06291d021 // indirect + golang.org/x/net v0.0.0-20190909003024-a7b16738d86b + golang.org/x/sys v0.0.0-20190910064555-bbd175535a8b + golang.org/x/tools v0.0.0-20190911022129-16c5e0f7d110 + google.golang.org/api v0.9.0 + google.golang.org/appengine v1.6.2 // indirect + google.golang.org/genproto v0.0.0-20190905072037-92dd089d5514 // indirect + google.golang.org/grpc v1.23.0 + gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect + gopkg.in/jcmturner/goidentity.v3 v3.0.0 // indirect + gopkg.in/jcmturner/gokrb5.v7 v7.3.0 // indirect + gopkg.in/karlseguin/expect.v1 v1.0.1 // indirect + sigs.k8s.io/yaml v1.1.0 // indirect +) + +replace github.com/satori/go.uuid v1.2.0 => github.com/satori/go.uuid v0.0.0-20181028125025-b2ce2384e17b @@ -0,0 +1,616 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +cloud.google.com/go v0.39.0/go.mod h1:rVLT6fkc8chs9sfPtFc1SBH6em7n+ZoXaG+87tDISts= +cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= +cloud.google.com/go v0.44.3 h1:0sMegbmn/8uTwpNkB0q9cLEpZ2W5a6kl+wtBQgPWBJQ= +cloud.google.com/go v0.44.3/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= +cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= +contrib.go.opencensus.io/exporter/aws v0.0.0-20181029163544-2befc13012d0/go.mod h1:uu1P0UCM/6RbsMrgPa98ll8ZcHM858i/AD06a9aLRCA= +contrib.go.opencensus.io/exporter/ocagent v0.5.0 h1:TKXjQSRS0/cCDrP7KvkgU6SmILtF/yV2TOs/02K/WZQ= +contrib.go.opencensus.io/exporter/ocagent v0.5.0/go.mod h1:ImxhfLRpxoYiSq891pBrLVhN+qmP8BTVvdH2YLs7Gl0= +contrib.go.opencensus.io/exporter/stackdriver v0.12.1/go.mod h1:iwB6wGarfphGGe/e5CWqyUk/cLzKnWsOKPVW3no6OTw= +contrib.go.opencensus.io/integrations/ocsql v0.1.4/go.mod h1:8DsSdjz3F+APR+0z0WkU1aRorQCFfRxvqjUUPMbF3fE= +contrib.go.opencensus.io/resource v0.1.1/go.mod h1:F361eGI91LCmW1I/Saf+rX0+OFcigGlFvXwEGEnkRLA= +github.com/Azure/azure-amqp-common-go/v2 v2.1.0/go.mod h1:R8rea+gJRuJR6QxTir/XuEd+YuKoUiazDC/N96FiDEU= +github.com/Azure/azure-pipeline-go v0.1.8/go.mod h1:XA1kFWRVhSK+KNFiOhfv83Fv8L9achrP7OxIzeTn1Yg= +github.com/Azure/azure-pipeline-go v0.1.9/go.mod h1:XA1kFWRVhSK+KNFiOhfv83Fv8L9achrP7OxIzeTn1Yg= +github.com/Azure/azure-pipeline-go v0.2.1 h1:OLBdZJ3yvOn2MezlWvbrBMTEUQC72zAftRZOMdj5HYo= +github.com/Azure/azure-pipeline-go v0.2.1/go.mod h1:UGSo8XybXnIGZ3epmeBw7Jdz+HiUVpqIlpz/HKHylF4= +github.com/Azure/azure-pipeline-go v0.2.2 h1:6oiIS9yaG6XCCzhgAgKFfIWyo4LLCiDhZot6ltoThhY= +github.com/Azure/azure-pipeline-go v0.2.2/go.mod h1:4rQ/NZncSvGqNkkOsNpOU1tgoNuIlp9AfUH5G1tvCHc= +github.com/Azure/azure-sdk-for-go v29.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/azure-sdk-for-go v30.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/azure-service-bus-go v0.9.1/go.mod h1:yzBx6/BUGfjfeqbRZny9AQIbIe3AcV9WZbAdpkoXOa0= +github.com/Azure/azure-storage-blob-go v0.6.0/go.mod h1:oGfmITT1V6x//CswqY2gtAHND+xIP64/qL7a5QJix0Y= +github.com/Azure/azure-storage-blob-go v0.8.0 h1:53qhf0Oxa0nOjgbDeeYPUeyiNmafAFEY95rZLK0Tj6o= +github.com/Azure/azure-storage-blob-go v0.8.0/go.mod h1:lPI3aLPpuLTeUwh1sViKXFxwl2B6teiRqI0deQUvsw0= +github.com/Azure/go-autorest v12.0.0+incompatible h1:N+VqClcomLGD/sHb3smbSYYtNMgKpVV3Cd5r5i8z6bQ= +github.com/Azure/go-autorest v12.0.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= +github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DataDog/zstd v1.3.6-0.20190409195224-796139022798 h1:2T/jmrHeTezcCM58lvEQXs0UpQJCo5SoGAcg+mbSTIg= +github.com/DataDog/zstd v1.3.6-0.20190409195224-796139022798/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= +github.com/DataDog/zstd v1.4.1 h1:3oxKN3wbHibqx897utPC2LTQU4J+IHWWJO+glkAkpFM= +github.com/DataDog/zstd v1.4.1/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= +github.com/GoogleCloudPlatform/cloudsql-proxy v0.0.0-20190605020000-c4ba1fdf4d36/go.mod h1:aJ4qN3TfrelA6NZ6AXsXRfmEVaYin3EDbSPJrKS8OXo= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/Shopify/sarama v1.23.1 h1:XxJBCZEoWJtoWjf/xRbmGUpAmTZGnuuF0ON0EvxxBrs= +github.com/Shopify/sarama v1.23.1/go.mod h1:XLH1GYJnLVE0XCr6KdJGVJRTwY30moWNJ4sERjXX6fs= +github.com/Shopify/toxiproxy v2.1.4+incompatible h1:TKdv8HiTLgE5wdJuEML90aBgNWsokNbMijUGhmcoBJc= +github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/aws/aws-sdk-go v1.15.27/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0= +github.com/aws/aws-sdk-go v1.19.18/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= +github.com/aws/aws-sdk-go v1.19.45/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= +github.com/aws/aws-sdk-go v1.23.13 h1:l/NG+mgQFRGG3dsFzEj0jw9JIs/zYdtU6MXhY1WIDmM= +github.com/aws/aws-sdk-go v1.23.13/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0 h1:HWo1m869IqiPhD389kmkxeTalrjNbbJTC8LXupb+sl0= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932 h1:mXoPYz/Ul5HYEDvkta6I8/rnYM5gSdSV2tJ6XbZuEtY= +github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= +github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= +github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= +github.com/census-instrumentation/opencensus-proto v0.2.0 h1:LzQXZOgg4CQfE6bFvXGM30YZL1WW/M337pXml+GrcZ4= +github.com/census-instrumentation/opencensus-proto v0.2.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/chrislusf/raft v0.0.0-20190225081310-10d6e2182d92 h1:lM9SFsh0EPXkyJyrTJqLZPAIJBtNFP6LNkYXu2MnSZI= +github.com/chrislusf/raft v0.0.0-20190225081310-10d6e2182d92/go.mod h1:4jyiUCD5y548+yKW+oiHtccBiMaLCCbFBpK2t7X4eUo= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/coreos/bbolt v1.3.2 h1:wZwiHHUieZCquLkDL0B8UhzreNWsPHooDAG3q34zk0s= +github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/bbolt v1.3.3 h1:n6AiVyVRKQFNb6mJlwESEvvLoDyiTzXX7ORAUlkeBdY= +github.com/coreos/bbolt v1.3.3/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/etcd v3.3.10+incompatible h1:jFneRYjIvLMLhDLCzuTuU4rSJUjRplcJQ7pD7MnhC04= +github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/etcd v3.3.15+incompatible h1:+9RjdC18gMxNQVvSiXvObLu29mOFmkgdsB4cRTlV+EE= +github.com/coreos/etcd v3.3.15+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-semver v0.2.0 h1:3Jm3tLmsgAYcjC+4Up7hJrFBPr+n7rAqYeSw/SZazuY= +github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= +github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e h1:Wf6HqHfScWJN9/ZjdUKyjop4mf3Qdd+1TvvltAvM3m8= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f h1:JOrtw2xFKzlg+cbHpyrpLDmnN1HqhBfnX7WDiW7eG2c= +github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg= +github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/devigned/tab v0.1.1/go.mod h1:XG9mPq0dFghrYvoBF3xdRrJzSTX1b7IQrvaL9mzjeJY= +github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= +github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8= +github.com/disintegration/imaging v1.6.1 h1:JnBbK6ECIZb1NsWIikP9pd8gIlTIRx7fuDNpU9fsxOE= +github.com/disintegration/imaging v1.6.1/go.mod h1:xuIt+sRxDFrHS0drzXUlCJthkJ8k7lkkUojDSR247MQ= +github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/eapache/go-resiliency v1.1.0 h1:1NtRmCAqadE2FN4ZcN6g90TP3uk8cg9rn9eNK2197aU= +github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= +github.com/eapache/go-resiliency v1.2.0 h1:v7g92e/KSN71Rq7vSThKaWIq68fL4YHvWyiUKorFR1Q= +github.com/eapache/go-resiliency v1.2.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= +github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 h1:YEetp8/yCZMuEPMUDHG0CW/brkkEp8mzqk2+ODEitlw= +github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= +github.com/eapache/queue v1.1.0 h1:YOEu7KNc61ntiQlcEeUIoDTJ2o8mQznoNvUhiigpIqc= +github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= +github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a h1:yDWHCSQ40h88yih2JAcL6Ls/kVkSE8GFACTGVnMPruw= +github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a/go.mod h1:7Ga40egUymuWXxAe151lTNnCv97MddSOVsjpPPkityA= +github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c h1:8ISkoahWXwZR41ois5lSJBSVw4D0OV19Ht/JSTzvSv0= +github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64= +github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 h1:JWuenKqqX8nojtoVVWjGfOF9635RETekkoH6Cc9SX0A= +github.com/facebookgo/stack v0.0.0-20160209184415-751773369052/go.mod h1:UbMTZqLaRiH3MsBH8va0n7s1pQYcu3uTb8G4tygF4Zg= +github.com/facebookgo/stats v0.0.0-20151006221625-1b76add642e4 h1:0YtRCqIZs2+Tz49QuH6cJVw/IFqzo39gEqZ0iYLxD2M= +github.com/facebookgo/stats v0.0.0-20151006221625-1b76add642e4/go.mod h1:vsJz7uE339KUCpBXx3JAJzSRH7Uk4iGGyJzR529qDIA= +github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 h1:7HZCaLC5+BZpmbhCOZJ293Lz68O7PYrF2EzeiFMwCLk= +github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4/go.mod h1:5tD+neXqOorC30/tWg0LCSkrqj/AR6gu8yY8/fpw1q0= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fortytw2/leaktest v1.2.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= +github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= +github.com/frankban/quicktest v1.7.2 h1:2QxQoC1TS09S7fhCPsrvqYdvP1H5M1P1ih5ABm3BTYk= +github.com/frankban/quicktest v1.7.2/go.mod h1:jaStnuzAqU1AJdCO0l53JDCJrVDKcS03DbaAcR7Ks/o= +github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/gabriel-vasile/mimetype v1.0.0 h1:0QKnAQQhG6oOsb4GK7iPlet7RtjHi9us8RF/nXoTxhI= +github.com/gabriel-vasile/mimetype v1.0.0/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To= +github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-ini/ini v1.25.4/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-redis/redis v6.15.2+incompatible h1:9SpNVG76gr6InJGxoZ6IuuxaCOQwDAhzyXg+Bs+0Sb4= +github.com/go-redis/redis v6.15.2+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA= +github.com/go-sql-driver/mysql v1.4.1 h1:g24URVg0OFbNUTx9qqY1IRZ9D9z3iPyi5zKhQZpNwpA= +github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/gocql/gocql v0.0.0-20190829130954-e163eff7a8c6 h1:P66kRWyEoIx6URKgAC3ijx9jo9gEid7bEhLQ/Z0G65A= +github.com/gocql/gocql v0.0.0-20190829130954-e163eff7a8c6/go.mod h1:Q7Sru5153KG8D9zwueuQJB3ccJf9/bIwF/x8b3oKgT8= +github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE= +github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.2.2-0.20190730201129-28a6bbf47e48 h1:X+zN6RZXsvnrSJaAIQhZezPfAfvsqihKKR8oiLHid34= +github.com/gogo/protobuf v1.2.2-0.20190730201129-28a6bbf47e48/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef h1:veQD95Isof8w9/WXiA+pa3tz3fJXkt5B7QaRBrM62gk= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 h1:ZgQEtGgCBiWRM39fZuwSd1LwSqqSW0hOdXCYYDX0R3I= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/snappy v0.0.0-20170215233205-553a64147049/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-replayers/grpcreplay v0.1.0 h1:eNb1y9rZFmY4ax45uEEECSa8fsxGRU+8Bil52ASAwic= +github.com/google/go-replayers/grpcreplay v0.1.0/go.mod h1:8Ig2Idjpr6gifRd6pNVggX6TC1Zw6Jx74AKp7QNH2QE= +github.com/google/go-replayers/httpreplay v0.1.0 h1:AX7FUb4BjrrzNvblr/OlgwrmFiep6soj5K2QSDW7BGk= +github.com/google/go-replayers/httpreplay v0.1.0/go.mod h1:YKZViNhiGgqdBlUbI2MwGpq4pXxNmhJLPHQ7cv2b5no= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible h1:xmapqc1AyLoB+ddYT6r04bD9lIjlOqGaREovi0SzFaE= +github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/subcommands v1.0.1/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk= +github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/wire v0.3.0 h1:imGQZGEVEHpje5056+K+cgdO72p0LQv2xIIFXNGUf60= +github.com/google/wire v0.3.0/go.mod h1:i1DMg/Lu8Sz5yYl25iOdmc5CT5qusaa+zmRWs16741s= +github.com/googleapis/gax-go v2.0.2+incompatible h1:silFMLAnr330+NRuag/VjIGF7TLp/LBrV2CJKFLWEww= +github.com/googleapis/gax-go v2.0.2+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= +github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= +github.com/googleapis/gax-go/v2 v2.0.5 h1:sjZBwGj9Jlw33ImPtvFviGYvseOtDM7hkSKB7+Tv3SM= +github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw= +github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q= +github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/gorilla/websocket v1.4.1 h1:q7AeDBpnBk8AogcD4DSag/Ukw/KV+YhzLj2bP5HvKCM= +github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0 h1:Iju5GlWwrvL6UBg4zJJt3btmonfrMlCDdsejg4CZE7c= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 h1:z53tR0945TRRQO/fLEVPI6SMv7ZflF0TEaTAoU7tOzg= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.9.0 h1:bM6ZAFZmc/wPFaRDi0d5L7hGEZEx/2u+Tmr2evNHDiI= +github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.9.2/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.11.0 h1:aT5ISUniaOTErogCQ+4pGoYNBB6rm6Fq3g1v8QwYGas= +github.com/grpc-ecosystem/grpc-gateway v1.11.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= +github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= +github.com/hashicorp/go-uuid v1.0.1 h1:fv1ep09latC32wFoVwnqcnKJGnMSdBanPczbHAYm1BE= +github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.3 h1:YPkqC67at8FYaadspW/6uE0COsBxS2656RLEr8Bppgk= +github.com/hashicorp/golang-lru v0.5.3/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/jacobsa/daemonize v0.0.0-20160101105449-e460293e890f h1:X+tnaqoCcBgAwSTJtoYW6p0qKiuPyMfofEHEFUf2kdU= +github.com/jacobsa/daemonize v0.0.0-20160101105449-e460293e890f/go.mod h1:Ip4fOwzCrnDVuluHBd7FXIMb7SHOKfkt9/UDrYSZvqI= +github.com/jcmturner/gofork v0.0.0-20190328161633-dc7c13fece03 h1:FUwcHNlEqkqLjLBdCp5PRlCFijNjvcYANOZXzCfXwCM= +github.com/jcmturner/gofork v0.0.0-20190328161633-dc7c13fece03/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= +github.com/jcmturner/gofork v1.0.0 h1:J7uCkflzTEhUZ64xqKnkDxq3kzc96ajM1Gli5ktUem8= +github.com/jcmturner/gofork v1.0.0/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o= +github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= +github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM= +github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= +github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= +github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo= +github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.7 h1:KfgG9LzI+pYjr4xvmz/5H4FXjokeP+rlHLhv3iH62Fo= +github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/karlseguin/ccache v2.0.3+incompatible h1:j68C9tWOROiOLWTS/kCGg9IcJG+ACqn5+0+t8Oh83UU= +github.com/karlseguin/ccache v2.0.3+incompatible/go.mod h1:CM9tNPzT6EdRh14+jiW8mEF9mkNZuuE51qmgGYUB93w= +github.com/karlseguin/expect v1.0.1 h1:z4wy4npwwHSWKjGWH85WNJO42VQhovxTCZDSzhjo8hY= +github.com/karlseguin/expect v1.0.1/go.mod h1:zNBxMY8P21owkeogJELCLeHIt+voOSduHYTFUbwRAV8= +github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/cpuid v1.2.1 h1:vJi+O/nMdFt0vqm8NZBI6wzALWdA2X+egi0ogNyrC/w= +github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= +github.com/klauspost/crc32 v1.2.0 h1:0VuyqOCruD33/lJ/ojXNvzVyl8Zr5zdTmj9l9qLZ86I= +github.com/klauspost/crc32 v1.2.0/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg= +github.com/klauspost/reedsolomon v1.9.2 h1:E9CMS2Pqbv+C7tsrYad4YC9MfhnMVWhMRsTi7U0UB18= +github.com/klauspost/reedsolomon v1.9.2/go.mod h1:CwCi+NUr9pqSVktrkN+Ondf06rkhYZ/pcNv7fu+8Un4= +github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s= +github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kurin/blazer v0.5.3 h1:SAgYv0TKU0kN/ETfO5ExjNAPyMt2FocO2s/UlCHfjAk= +github.com/kurin/blazer v0.5.3/go.mod h1:4FCXMUWo9DllR2Do4TtBd377ezyAJ51vB5uTBjt0pGU= +github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0= +github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= +github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= +github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-ieproxy v0.0.0-20190610004146-91bb50d98149 h1:HfxbT6/JcvIljmERptWhwa8XzP7H3T+Z2N26gTsaDaA= +github.com/mattn/go-ieproxy v0.0.0-20190610004146-91bb50d98149/go.mod h1:31jz6HNzdxOmlERGGEc4v/dMssOfmp2p5bT/okiKFFc= +github.com/mattn/go-ieproxy v0.0.0-20190702010315-6dee0af9227d/go.mod h1:31jz6HNzdxOmlERGGEc4v/dMssOfmp2p5bT/okiKFFc= +github.com/mattn/go-ieproxy v0.0.0-20190805055040-f9202b1cfdeb h1:hXqqXzQtJbENrsb+rsIqkVqcg4FUJL0SQFGw08Dgivw= +github.com/mattn/go-ieproxy v0.0.0-20190805055040-f9202b1cfdeb/go.mod h1:31jz6HNzdxOmlERGGEc4v/dMssOfmp2p5bT/okiKFFc= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-runewidth v0.0.3 h1:a+kO+98RDGEfo6asOGMmpodZq4FNtnGP54yps8BzLR4= +github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= +github.com/mattn/go-runewidth v0.0.4 h1:2BvfKmzob6Bmd4YsL0zygOqfdFnK7GR4QL06Do4/p7Y= +github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= +github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nats-io/jwt v0.2.6/go.mod h1:mQxQ0uHQ9FhEVPIcTSKwx2lqZEpXWWcCgA7R6NrWvvY= +github.com/nats-io/jwt v0.2.14 h1:wA50KvFz/JXGXMHRygTWsRGh/ixxgC5E3kHvmtGLNf4= +github.com/nats-io/jwt v0.2.14/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= +github.com/nats-io/nats-server/v2 v2.0.0/go.mod h1:RyVdsHHvY4B6c9pWG+uRLpZ0h0XsqiuKp2XCTurP5LI= +github.com/nats-io/nats-server/v2 v2.0.4 h1:XOMeQRbhl1lGNTIctPhih6pTa15NGif54Uas6ZW5q7g= +github.com/nats-io/nats-server/v2 v2.0.4/go.mod h1:AWdGEVbjKRS9ZIx4DSP5eKW48nfFm7q3uiSkP/1KD7M= +github.com/nats-io/nats.go v1.8.1 h1:6lF/f1/NN6kzUDBz6pyvQDEXO39jqXcWRLu/tKjtOUQ= +github.com/nats-io/nats.go v1.8.1/go.mod h1:BrFz9vVn0fU3AcH9Vn4Kd7W0NpJ651tD5omQ3M8LwxM= +github.com/nats-io/nkeys v0.0.2 h1:+qM7QpgXnvDDixitZtQUBDY9w/s9mu1ghS+JIbsrx6M= +github.com/nats-io/nkeys v0.0.2/go.mod h1:dab7URMsZm6Z/jp9Z5UGa87Uutgc2mVpXLC4B7TDb/4= +github.com/nats-io/nkeys v0.1.0 h1:qMd4+pRHgdr1nAClu+2h/2a5F2TmKcCzjCDazVgRoX4= +github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= +github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= +github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= +github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.10.1 h1:q/mM8GF/n0shIN8SaAZ0V+jnLPzen6WIVZdiwrRlMlo= +github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU= +github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/onsi/gomega v1.7.0 h1:XPnZz8VVBHjVsy1vzJmRwIcSwiUO+JFfrv/xGiigmME= +github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pelletier/go-toml v1.4.0 h1:u3Z1r+oOXJIkxqw34zVhyPgjBsm6X2wn21NWs/HfSeg= +github.com/pelletier/go-toml v1.4.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo= +github.com/peterh/liner v1.1.0 h1:f+aAedNJA6uk7+6rXsYBnhdo4Xux7ESLe+kcuVUF5os= +github.com/peterh/liner v1.1.0/go.mod h1:CRroGNssyjTd/qIG2FyxByd2S8JEAZXBl4qUrZf8GS0= +github.com/pierrec/lz4 v0.0.0-20190327172049-315a67e90e41/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= +github.com/pierrec/lz4 v2.2.7+incompatible h1:Eerk9aiqeZo2QzsbWOAsELUf9ddvAxEdMY9LYze/DEc= +github.com/pierrec/lz4 v2.2.7+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= +github.com/prometheus/client_golang v1.0.0 h1:vrDKnkGzuGvhNAL56c7DBz29ZL+KxnoR0x7enabFceM= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.1.0 h1:BQ53HtBmfOitExawJ6LokA4x8ov/z0SYYb0+HxJfRI8= +github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 h1:S/YWwWx/RA8rT8tKFRuGUZhuA90OyIBpPCXkcbwU8DE= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.4.1 h1:K0MGApIoQvMw27RTdJkPbr3JZ7DNbtxQNyi5STVM6Kw= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.6.0 h1:kRhiuYSXR3+uv2IbVbZhUxK5zVD/2pp3Gd2PpvPkpEo= +github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= +github.com/prometheus/procfs v0.0.4 h1:w8DjqFMJDjuVwdZBQoOozr4MVWOnwF7RcL/7uxBjY78= +github.com/prometheus/procfs v0.0.4/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ= +github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/rakyll/statik v0.1.6 h1:uICcfUXpgqtw2VopbIncslhAmE5hwc4g20TEyEENBNs= +github.com/rakyll/statik v0.1.6/go.mod h1:OEi9wJV/fMUAGx1eNjq75DKDsJVuEv1U0oYdX6GX8Zs= +github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhDYGoxY8uLVpewe1GDZ2vu2Tr/vTdVAkFQ= +github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563 h1:dY6ETXrvDG7Sa4vE8ZQG4yqWg6UnOcbqTAahkV813vQ= +github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= +github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd h1:CmH9+J6ZSsIjUK3dcGsnCnO41eRBOnY12zwkn5qVwgc= +github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= +github.com/seaweedfs/fuse v0.0.0-20190510212405-310228904eff h1:uLd5zBvf5OA67wcVRePHrFt60bR4LSskaVhgVwyk0Jg= +github.com/seaweedfs/fuse v0.0.0-20190510212405-310228904eff/go.mod h1:cubdLmQFqEUZ9vNJrznhgc3m3VMAJi/nY2Ix2axXkG0= +github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E= +github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc= +github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= +github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= +github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= +github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU= +github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= +github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94 h1:0ngsPmuP6XIjiFRNFYlvKwSr5zff2v+uPHaffZ6/M4k= +github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= +github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271 h1:WhxRHzgeVGETMlmVfqhRn8RIeeNoPr2Czh33I4Zdccw= +github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1 h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48= +github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= +github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= +github.com/tidwall/gjson v1.3.2 h1:+7p3qQFaH3fOMXAJSrdZwGKcOO/lYdGS0HqGhPqDdTI= +github.com/tidwall/gjson v1.3.2/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls= +github.com/tidwall/match v1.0.1 h1:PnKP62LPNxHKTwvHHZZzdOAOCtsJTjo6dZLCwpKm5xc= +github.com/tidwall/match v1.0.1/go.mod h1:LujAq0jyVjBy028G1WhWfIzbpQfMO8bBZ6Tyb0+pL9E= +github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= +github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= +github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5 h1:LnC5Kc/wtumK+WB441p7ynQJzVuNRJiqddSIE3IlSEQ= +github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/ugorji/go v1.1.4 h1:j4s+tAvLfL3bZyefP2SEWmhBzmuIlH/eqNuPdFPgngw= +github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc= +github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= +github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA= +github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8= +github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 h1:3UeQBvD0TFrlVjOeLOBz+CPAI8dnbqNSVwUwRrkp7vQ= +github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0/go.mod h1:IXCdmsXIht47RaVFLEdVnh1t+pgYtTAhQGj73kz+2DM= +github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= +github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +go.etcd.io/bbolt v1.3.2 h1:Z/90sZLPOeCy2PwprqkFa25PdkusRzaj9P8zm/KNyvk= +go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.etcd.io/bbolt v1.3.3 h1:MUGmc65QhB3pIlaQ5bB4LwqSj6GIonVJXpZiaKNyaKk= +go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.etcd.io/etcd v3.3.15+incompatible h1:0VpOVCF6EFnJptt8Jh0EWEHO4j2fepyV1fpu9xz/UoQ= +go.etcd.io/etcd v3.3.15+incompatible/go.mod h1:yaeTdrJi5lOmYerz05bd8+V7KubZs8YSFZfzsF9A6aI= +go.opencensus.io v0.15.0/go.mod h1:UffZAU+4sDEINUGP/B7UfBBkq4fqLu9zXAX7ke6CHW0= +go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opencensus.io v0.22.0 h1:C9hSCOW830chIVkdja34wa6Ky+IzWllkUinR+BtRZd4= +go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= +go.uber.org/atomic v1.4.0 h1:cxzIVoETapQEqDhQu3QfnvXAV4AlzcvUCxkVUFw3+EU= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/multierr v1.2.0 h1:6I+W7f5VwC5SV9dNrZ3qXrDB9mD0dyGOi/ZJmYw03T4= +go.uber.org/multierr v1.2.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +gocloud.dev v0.16.0 h1:hWeaQWxamGerwsU7B9xSWvUjx0p7TwG8fcHro2TzbbM= +gocloud.dev v0.16.0/go.mod h1:xWGXD8t7bEhqPIuyAUFyXV9qHn+PvEY2F2GKPh7i/O0= +gocloud.dev/pubsub/natspubsub v0.16.0 h1:MoBGXULDzb1fVaZsGWO5cUCgr6yoI/DHhau8OPGaGEI= +gocloud.dev/pubsub/natspubsub v0.16.0/go.mod h1:0n7pT7PkLMClBUHDrOkHfOFVr/o/6kawNMwsyAbwadI= +gocloud.dev/pubsub/rabbitpubsub v0.16.0 h1:Bkv2njMSl2tmT3tGbvbwpiIDAXBIpqzP9dmts+rhD4E= +gocloud.dev/pubsub/rabbitpubsub v0.16.0/go.mod h1:JJVdUUIqwgaaMJg/1xHQza0g4sI/4KHHSNiGE+pn4JM= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= +golang.org/x/crypto v0.0.0-20190530122614-20be4c3c3ed5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 h1:58fnuSXlxZmFdJyvtTFVmVhcMLU6v5fEb/ok4wyqtNU= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190911031432-227b76d455e7 h1:0hQKqeLdqlt5iIwVOBErRisrHJAN57yOiPRQItI20fU= +golang.org/x/crypto v0.0.0-20190911031432-227b76d455e7/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= +golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067 h1:KYGJGHOQy8oSi1fDlSpcZF0+juKwk/hEMv5SiwHogR0= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190829233526-b3c06291d021 h1:j6QOxNFMpEL1wIQX6TUdBPNfGZKmBOJS/vfSm8a7tdM= +golang.org/x/image v0.0.0-20190829233526-b3c06291d021/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190619014844-b5b0513f8c1b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190909003024-a7b16738d86b h1:XfVGCX+0T4WOStkaOsJRllbsiImhB2jgVBGc9L0lPGc= +golang.org/x/net v0.0.0-20190909003024-a7b16738d86b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190620070143-6f217b454f45/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0 h1:HyfiK1WMnHj5FXFXatD+Qs1A/xC2Run6RzeW1SyHxpc= +golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190910064555-bbd175535a8b h1:3S2h5FadpNr0zUUCVZjlKIEYF+KaX/OBplTGo89CYHI= +golang.org/x/sys v0.0.0-20190910064555-bbd175535a8b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 h1:SvFZT6jyqRaOeXpc5h/JSfZenJ2O330aBsf7JfSUXmQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190422233926-fe54fb35175b/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190911022129-16c5e0f7d110 h1:6S6bidS7O4yAwA5ORRbRIjvNQ9tGbLd5e+LRIaTeVDQ= +golang.org/x/tools v0.0.0-20190911022129-16c5e0f7d110/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 h1:9zdDQZ7Thm29KFXgAX/+yaf3eVbP7djjWp/dXAppNCc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.5.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.6.0/go.mod h1:btoxGiFvQNVUZQ8W08zLtrVS08CNpINPEfxXxgJL1Q4= +google.golang.org/api v0.7.0 h1:9sdfJOzWlkqPltHAuzT2Cp+yrBeY1KRVYgms8soxMwM= +google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= +google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.9.0 h1:jbyannxz0XFD3zdjgrSUsaJbgpH4eTrkdhRChkHPfO8= +google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I= +google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/appengine v1.6.2 h1:j8RI1yW0SkI+paT6uGwMlrMI/6zwYA6/CFil8rxOzGI= +google.golang.org/appengine v1.6.2/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190508193815-b515fa19cec8/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= +google.golang.org/genproto v0.0.0-20190620144150-6af8c5fc6601/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= +google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190905072037-92dd089d5514 h1:oFSK4421fpCKRrpzIpybyBVWyht05NegY9+L/3TLAZs= +google.golang.org/genproto v0.0.0-20190905072037-92dd089d5514/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.23.0 h1:AzbTB6ux+okLTzP8Ru1Xs41C303zdcfEht7MQnYJt5A= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/jcmturner/aescts.v1 v1.0.1 h1:cVVZBK2b1zY26haWB4vbBiZrfFQnfbTVrE3xZq6hrEw= +gopkg.in/jcmturner/aescts.v1 v1.0.1/go.mod h1:nsR8qBOg+OucoIW+WMhB3GspUQXq9XorLnQb9XtvcOo= +gopkg.in/jcmturner/dnsutils.v1 v1.0.1 h1:cIuC1OLRGZrld+16ZJvvZxVJeKPsvd5eUIvxfoN5hSM= +gopkg.in/jcmturner/dnsutils.v1 v1.0.1/go.mod h1:m3v+5svpVOhtFAP/wSz+yzh4Mc0Fg7eRhxkJMWSIz9Q= +gopkg.in/jcmturner/goidentity.v3 v3.0.0 h1:1duIyWiTaYvVx3YX2CYtpJbUFd7/UuPYCfgXtQ3VTbI= +gopkg.in/jcmturner/goidentity.v3 v3.0.0/go.mod h1:oG2kH0IvSYNIu80dVAyu/yoefjq1mNfM5bm88whjWx4= +gopkg.in/jcmturner/gokrb5.v7 v7.2.3 h1:hHMV/yKPwMnJhPuPx7pH2Uw/3Qyf+thJYlisUc44010= +gopkg.in/jcmturner/gokrb5.v7 v7.2.3/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM= +gopkg.in/jcmturner/gokrb5.v7 v7.3.0 h1:0709Jtq/6QXEuWRfAm260XqlpcwL1vxtO1tUE2qK8Z4= +gopkg.in/jcmturner/gokrb5.v7 v7.3.0/go.mod h1:l8VISx+WGYp+Fp7KRbsiUuXTTOnxIc3Tuvyavf11/WM= +gopkg.in/jcmturner/rpc.v1 v1.1.0 h1:QHIUxTX1ISuAv9dD2wJ9HWQVuWDX/Zc0PfeC2tjc4rU= +gopkg.in/jcmturner/rpc.v1 v1.1.0/go.mod h1:YIdkC4XfD6GXbzje11McwsDuOlZQSb9W4vfLvuNnlv8= +gopkg.in/karlseguin/expect.v1 v1.0.1 h1:9u0iUltnhFbJTHaSIH0EP+cuTU5rafIgmcsEsg2JQFw= +gopkg.in/karlseguin/expect.v1 v1.0.1/go.mod h1:uB7QIJBcclvYbwlUDkSCsGjAOMis3fP280LyhuDEf2I= +gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +pack.ag/amqp v0.11.2/go.mod h1:4/cbmt4EJXSKlG6LCfWHoqmN0uFdy5i/+YFz+fTfhV4= +rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= +sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs= +sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 000000000..5ec3ab407 --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,23 @@ +## SEAWEEDFS - helm chart (2.x) + +### info: +* master/filer/volume are stateful sets with anti-affinity on the hostname, +so your deployment will be spread/HA. +* chart is using memsql(mysql) as the filer backend to enable HA (multiple filer instances) +and backup/HA memsql can provide. +* mysql user/password are created in a k8s secret (secret-seaweedfs-db.yaml) and injected to the filer +with ENV. +* cert config exists and can be enabled, but not been tested. + +### current instances config (AIO): +1 instance for each type (master/filer/volume/s3) + +instances need node labels: +* sw-volume: true (for volume instance, specific tag) +* sw-backend: true (for all others, as they less resource demanding) + +you can update the replicas count for each node type in values.yaml, +need to add more nodes with the corresponding label. + +most of the configuration are available through values.yaml + diff --git a/k8s/seaweedfs/.helmignore b/k8s/seaweedfs/.helmignore new file mode 100644 index 000000000..50af03172 --- /dev/null +++ b/k8s/seaweedfs/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/k8s/seaweedfs/Chart.yaml b/k8s/seaweedfs/Chart.yaml new file mode 100644 index 000000000..136d91e20 --- /dev/null +++ b/k8s/seaweedfs/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +description: SeaweedFS +name: seaweedfs +version: 1.61 diff --git a/k8s/seaweedfs/templates/_helpers.tpl b/k8s/seaweedfs/templates/_helpers.tpl new file mode 100644 index 000000000..04a782f8b --- /dev/null +++ b/k8s/seaweedfs/templates/_helpers.tpl @@ -0,0 +1,114 @@ +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to +this (by the DNS naming spec). If release name contains chart name it will +be used as a full name. +*/}} +{{- define "seaweedfs.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "seaweedfs.chart" -}} +{{- printf "%s-helm" .Chart.Name | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "seaweedfs.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Inject extra environment vars in the format key:value, if populated +*/}} +{{- define "seaweedfs.extraEnvironmentVars" -}} +{{- if .extraEnvironmentVars -}} +{{- range $key, $value := .extraEnvironmentVars }} +- name: {{ $key }} + value: {{ $value | quote }} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* Return the proper filer image */}} +{{- define "filer.image" -}} +{{- if .Values.filer.imageOverride -}} +{{- $imageOverride := .Values.filer.imageOverride -}} +{{- printf "%s" $imageOverride -}} +{{- else -}} +{{- $registryName := default .Values.image.registry .Values.global.localRegistry | toString -}} +{{- $repositoryName := .Values.image.repository | toString -}} +{{- $name := .Values.global.imageName | toString -}} +{{- $tag := .Values.global.imageTag | toString -}} +{{- printf "%s%s%s:%s" $registryName $repositoryName $name $tag -}} +{{- end -}} +{{- end -}} + +{{/* Return the proper postgresqlSchema image */}} +{{- define "filer.dbSchema.image" -}} +{{- if .Values.filer.dbSchema.imageOverride -}} +{{- $imageOverride := .Values.filer.dbSchema.imageOverride -}} +{{- printf "%s" $imageOverride -}} +{{- else -}} +{{- $registryName := default .Values.global.registry .Values.global.localRegistry | toString -}} +{{- $repositoryName := .Values.global.repository | toString -}} +{{- $name := .Values.filer.dbSchema.imageName | toString -}} +{{- $tag := .Values.filer.dbSchema.imageTag | toString -}} +{{- printf "%s%s%s:%s" $registryName $repositoryName $name $tag -}} +{{- end -}} +{{- end -}} + +{{/* Return the proper master image */}} +{{- define "master.image" -}} +{{- if .Values.master.imageOverride -}} +{{- $imageOverride := .Values.master.imageOverride -}} +{{- printf "%s" $imageOverride -}} +{{- else -}} +{{- $registryName := default .Values.image.registry .Values.global.localRegistry | toString -}} +{{- $repositoryName := .Values.image.repository | toString -}} +{{- $name := .Values.global.imageName | toString -}} +{{- $tag := .Values.global.imageTag | toString -}} +{{- printf "%s%s%s:%s" $registryName $repositoryName $name $tag -}} +{{- end -}} +{{- end -}} + +{{/* Return the proper s3 image */}} +{{- define "s3.image" -}} +{{- if .Values.s3.imageOverride -}} +{{- $imageOverride := .Values.s3.imageOverride -}} +{{- printf "%s" $imageOverride -}} +{{- else -}} +{{- $registryName := default .Values.image.registry .Values.global.localRegistry | toString -}} +{{- $repositoryName := .Values.image.repository | toString -}} +{{- $name := .Values.global.imageName | toString -}} +{{- $tag := .Values.global.imageTag | toString -}} +{{- printf "%s%s%s:%s" $registryName $repositoryName $name $tag -}} +{{- end -}} +{{- end -}} + +{{/* Return the proper volume image */}} +{{- define "volume.image" -}} +{{- if .Values.volume.imageOverride -}} +{{- $imageOverride := .Values.volume.imageOverride -}} +{{- printf "%s" $imageOverride -}} +{{- else -}} +{{- $registryName := default .Values.image.registry .Values.global.localRegistry | toString -}} +{{- $repositoryName := .Values.image.repository | toString -}} +{{- $name := .Values.global.imageName | toString -}} +{{- $tag := .Values.global.imageTag | toString -}} +{{- printf "%s%s%s:%s" $registryName $repositoryName $name $tag -}} +{{- end -}} +{{- end -}}
\ No newline at end of file diff --git a/k8s/seaweedfs/templates/ca-cert.yaml b/k8s/seaweedfs/templates/ca-cert.yaml new file mode 100644 index 000000000..056f01502 --- /dev/null +++ b/k8s/seaweedfs/templates/ca-cert.yaml @@ -0,0 +1,14 @@ +{{- if .Values.global.enableSecurity }} +apiVersion: certmanager.k8s.io/v1alpha1 +kind: Certificate +metadata: + name: {{ template "seaweedfs.name" . }}-ca-cert + namespace: {{ .Release.Namespace }} +spec: + secretName: {{ template "seaweedfs.name" . }}-ca-cert + commonName: "{{ template "seaweedfs.name" . }}-root-ca" + isCA: true + issuerRef: + name: {{ template "seaweedfs.name" . }}-clusterissuer + kind: ClusterIssuer +{{- end }} diff --git a/k8s/seaweedfs/templates/cert-clusterissuer.yaml b/k8s/seaweedfs/templates/cert-clusterissuer.yaml new file mode 100644 index 000000000..d0bd42593 --- /dev/null +++ b/k8s/seaweedfs/templates/cert-clusterissuer.yaml @@ -0,0 +1,8 @@ +{{- if .Values.global.enableSecurity }} +apiVersion: certmanager.k8s.io/v1alpha1 +kind: ClusterIssuer +metadata: + name: {{ template "seaweedfs.name" . }}-clusterissuer +spec: + selfSigned: {} +{{- end }} diff --git a/k8s/seaweedfs/templates/client-cert.yaml b/k8s/seaweedfs/templates/client-cert.yaml new file mode 100644 index 000000000..4d27b5659 --- /dev/null +++ b/k8s/seaweedfs/templates/client-cert.yaml @@ -0,0 +1,33 @@ +{{- if .Values.global.enableSecurity }} +apiVersion: certmanager.k8s.io/v1alpha1 +kind: Certificate +metadata: + name: {{ template "seaweedfs.name" . }}-client-cert + namespace: {{ .Release.Namespace }} +spec: + secretName: {{ template "seaweedfs.name" . }}-client-cert + issuerRef: + name: {{ template "seaweedfs.name" . }}-clusterissuer + kind: ClusterIssuer + commonName: {{ .Values.certificates.commonName }} + organization: + - "SeaweedFS CA" + dnsNames: + - '*.{{ .Release.Namespace }}' + - '*.{{ .Release.Namespace }}.svc' + - '*.{{ .Release.Namespace }}.svc.cluster.local' + - '*.{{ template "seaweedfs.name" . }}-master' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}.svc' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}.svc.cluster.local' +{{- if .Values.certificates.ipAddresses }} + ipAddresses: + {{- range .Values.certificates.ipAddresses }} + - {{ . }} + {{- end }} +{{- end }} + keyAlgorithm: {{ .Values.certificates.keyAlgorithm }} + keySize: {{ .Values.certificates.keySize }} + duration: {{ .Values.certificates.duration }} + renewBefore: {{ .Values.certificates.renewBefore }} +{{- end }} diff --git a/k8s/seaweedfs/templates/filer-cert.yaml b/k8s/seaweedfs/templates/filer-cert.yaml new file mode 100644 index 000000000..855183c54 --- /dev/null +++ b/k8s/seaweedfs/templates/filer-cert.yaml @@ -0,0 +1,33 @@ +{{- if .Values.global.enableSecurity }} +apiVersion: certmanager.k8s.io/v1alpha1 +kind: Certificate +metadata: + name: {{ template "seaweedfs.name" . }}-filer-cert + namespace: {{ .Release.Namespace }} +spec: + secretName: {{ template "seaweedfs.name" . }}-filer-cert + issuerRef: + name: {{ template "seaweedfs.name" . }}-clusterissuer + kind: ClusterIssuer + commonName: {{ .Values.certificates.commonName }} + organization: + - "SeaweedFS CA" + dnsNames: + - '*.{{ .Release.Namespace }}' + - '*.{{ .Release.Namespace }}.svc' + - '*.{{ .Release.Namespace }}.svc.cluster.local' + - '*.{{ template "seaweedfs.name" . }}-master' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}.svc' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}.svc.cluster.local' +{{- if .Values.certificates.ipAddresses }} + ipAddresses: + {{- range .Values.certificates.ipAddresses }} + - {{ . }} + {{- end }} +{{- end }} + keyAlgorithm: {{ .Values.certificates.keyAlgorithm }} + keySize: {{ .Values.certificates.keySize }} + duration: {{ .Values.certificates.duration }} + renewBefore: {{ .Values.certificates.renewBefore }} +{{- end }} diff --git a/k8s/seaweedfs/templates/filer-service.yaml b/k8s/seaweedfs/templates/filer-service.yaml new file mode 100644 index 000000000..493859e36 --- /dev/null +++ b/k8s/seaweedfs/templates/filer-service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "seaweedfs.name" . }}-filer + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "seaweedfs.name" . }} + component: filer +spec: + clusterIP: None + ports: + - name: "swfs-filer" + port: {{ .Values.filer.port }} + targetPort: {{ .Values.filer.port }} + protocol: TCP + - name: "swfs-filer-grpc" + port: {{ .Values.filer.grpcPort }} + targetPort: {{ .Values.filer.grpcPort }} + protocol: TCP + selector: + app: {{ template "seaweedfs.name" . }} + component: filer
\ No newline at end of file diff --git a/k8s/seaweedfs/templates/filer-statefulset.yaml b/k8s/seaweedfs/templates/filer-statefulset.yaml new file mode 100644 index 000000000..43da74c43 --- /dev/null +++ b/k8s/seaweedfs/templates/filer-statefulset.yaml @@ -0,0 +1,207 @@ +{{- if .Values.filer.enabled }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ template "seaweedfs.name" . }}-filer + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "seaweedfs.name" . }}-filer + podManagementPolicy: Parallel + replicas: {{ .Values.filer.replicas }} + {{- if (gt (int .Values.filer.updatePartition) 0) }} + updateStrategy: + type: RollingUpdate + rollingUpdate: + partition: {{ .Values.filer.updatePartition }} + {{- end }} + selector: + matchLabels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + release: {{ .Release.Name }} + component: filer + template: + metadata: + labels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + release: {{ .Release.Name }} + component: filer + spec: + restartPolicy: {{ default .Values.global.restartPolicy .Values.filer.restartPolicy }} + {{- if .Values.filer.affinity }} + affinity: + {{ tpl .Values.filer.affinity . | nindent 8 | trim }} + {{- end }} + {{- if .Values.filer.tolerations }} + tolerations: + {{ tpl .Values.filer.tolerations . | nindent 8 | trim }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecrets }} + {{- end }} + serviceAccountName: seaweefds-rw-sa #hack for delete pod master after migration + terminationGracePeriodSeconds: 60 + {{- if .Values.filer.priorityClassName }} + priorityClassName: {{ .Values.filer.priorityClassName | quote }} + {{- end }} + enableServiceLinks: false + containers: + - name: seaweedfs + image: {{ template "filer.image" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.imagePullPolicy }} + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: WEED_MYSQL_USERNAME + valueFrom: + secretKeyRef: + name: secret-seaweedfs-db + key: user + - name: WEED_MYSQL_PASSWORD + valueFrom: + secretKeyRef: + name: secret-seaweedfs-db + key: password + - name: SEAWEEDFS_FULLNAME + value: "{{ template "seaweedfs.name" . }}" + {{- if .Values.filer.extraEnvironmentVars }} + {{- range $key, $value := .Values.filer.extraEnvironmentVars }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + {{- end }} + command: + - "/bin/sh" + - "-ec" + - | + exec /usr/bin/weed -logdir=/logs \ + {{- if .Values.filer.loggingOverrideLevel }} + -v={{ .Values.filer.loggingOverrideLevel }} \ + {{- else }} + -v={{ .Values.global.loggingLevel }} \ + {{- end }} + filer \ + -port={{ .Values.filer.port }} \ + {{- if .Values.filer.disableHttp }} + -disableHttp \ + {{- end }} + {{- if .Values.filer.disableDirListing }} + -disableDirListing \ + {{- end }} + -dirListLimit={{ .Values.filer.dirListLimit }} \ + -ip=${POD_IP} \ + -master={{ range $index := until (.Values.master.replicas | int) }}${SEAWEEDFS_FULLNAME}-master-{{ $index }}.${SEAWEEDFS_FULLNAME}-master:{{ $.Values.master.port }}{{ if lt $index (sub ($.Values.master.replicas | int) 1) }},{{ end }}{{ end }} + {{- if or (.Values.global.enableSecurity) (.Values.filer.extraVolumeMounts) }} + volumeMounts: + - name: seaweedfs-filer-log-volume + mountPath: "/logs/" + {{- if .Values.global.enableSecurity }} + - name: security-config + readOnly: true + mountPath: /etc/seaweedfs/security.toml + subPath: security.toml + - name: ca-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/ca/ + - name: master-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/master/ + - name: volume-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/volume/ + - name: filer-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/filer/ + - name: client-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/client/ + {{- end }} + {{ tpl .Values.filer.extraVolumeMounts . | nindent 12 | trim }} + {{- end }} + ports: + - containerPort: {{ .Values.filer.port }} + name: swfs-filer + - containerPort: {{ .Values.filer.grpcPort }} + #name: swfs-filer-grpc + readinessProbe: + httpGet: + path: / + port: {{ .Values.filer.port }} + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 15 + successThreshold: 1 + failureThreshold: 100 + livenessProbe: + httpGet: + path: / + port: {{ .Values.filer.port }} + scheme: HTTP + initialDelaySeconds: 20 + periodSeconds: 30 + successThreshold: 1 + failureThreshold: 5 + {{- if .Values.filer.resources }} + resources: + {{ tpl .Values.filer.resources . | nindent 12 | trim }} + {{- end }} + volumes: + - name: seaweedfs-filer-log-volume + hostPath: + path: /storage/logs/seaweedfs/filer + type: DirectoryOrCreate + {{- if .Values.global.enableSecurity }} + - name: security-config + configMap: + name: {{ template "seaweedfs.name" . }}-security-config + - name: ca-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-ca-cert + - name: master-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-master-cert + - name: volume-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-volume-cert + - name: filer-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-filer-cert + - name: client-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-client-cert + {{- end }} + {{ tpl .Values.filer.extraVolumes . | indent 8 | trim }} + {{- if .Values.filer.nodeSelector }} + nodeSelector: + {{ tpl .Values.filer.nodeSelector . | indent 8 | trim }} + {{- end }} +{{/* volumeClaimTemplates:*/}} +{{/* - metadata:*/}} +{{/* name: data-{{ .Release.Namespace }}*/}} +{{/* spec:*/}} +{{/* accessModes:*/}} +{{/* - ReadWriteOnce*/}} +{{/* resources:*/}} +{{/* requests:*/}} +{{/* storage: {{ .Values.filer.storage }}*/}} +{{/* {{- if .Values.filer.storageClass }}*/}} +{{/* storageClassName: {{ .Values.filer.storageClass }}*/}} +{{/* {{- end }}*/}} +{{- end }} diff --git a/k8s/seaweedfs/templates/ingress.yaml b/k8s/seaweedfs/templates/ingress.yaml new file mode 100644 index 000000000..dcd52c138 --- /dev/null +++ b/k8s/seaweedfs/templates/ingress.yaml @@ -0,0 +1,59 @@ +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: ingress-{{ template "seaweedfs.name" . }}-filer + annotations: + kubernetes.io/ingress.class: "nginx" + nginx.ingress.kubernetes.io/auth-type: "basic" + nginx.ingress.kubernetes.io/auth-secret: "default/ingress-basic-auth-secret" + nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required - SW-Filer' + nginx.ingress.kubernetes.io/service-upstream: "true" + nginx.ingress.kubernetes.io/rewrite-target: /$1 + nginx.ingress.kubernetes.io/use-regex: "true" + nginx.ingress.kubernetes.io/enable-rewrite-log: "true" + nginx.ingress.kubernetes.io/ssl-redirect: "false" + nginx.ingress.kubernetes.io/force-ssl-redirect: "false" + nginx.ingress.kubernetes.io/configuration-snippet: | + sub_filter '<head>' '<head> <base href="/sw-filer/">'; #add base url + sub_filter '="/' '="./'; #make absolute paths to relative + sub_filter '=/' '=./'; + sub_filter '/seaweedfsstatic' './seaweedfsstatic'; + sub_filter_once off; +spec: + rules: + - http: + paths: + - path: /sw-filer/?(.*) + backend: + serviceName: {{ template "seaweedfs.name" . }}-filer + servicePort: {{ .Values.filer.port }} +--- +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: ingress-{{ template "seaweedfs.name" . }}-master + annotations: + kubernetes.io/ingress.class: "nginx" + nginx.ingress.kubernetes.io/auth-type: "basic" + nginx.ingress.kubernetes.io/auth-secret: "default/ingress-basic-auth-secret" + nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required - SW-Master' + nginx.ingress.kubernetes.io/service-upstream: "true" + nginx.ingress.kubernetes.io/rewrite-target: /$1 + nginx.ingress.kubernetes.io/use-regex: "true" + nginx.ingress.kubernetes.io/enable-rewrite-log: "true" + nginx.ingress.kubernetes.io/ssl-redirect: "false" + nginx.ingress.kubernetes.io/force-ssl-redirect: "false" + nginx.ingress.kubernetes.io/configuration-snippet: | + sub_filter '<head>' '<head> <base href="/sw-master/">'; #add base url + sub_filter '="/' '="./'; #make absolute paths to relative + sub_filter '=/' '=./'; + sub_filter '/seaweedfsstatic' './seaweedfsstatic'; + sub_filter_once off; +spec: + rules: + - http: + paths: + - path: /sw-master/?(.*) + backend: + serviceName: {{ template "seaweedfs.name" . }}-master + servicePort: {{ .Values.master.port }} diff --git a/k8s/seaweedfs/templates/master-cert.yaml b/k8s/seaweedfs/templates/master-cert.yaml new file mode 100644 index 000000000..a8b0fc1d1 --- /dev/null +++ b/k8s/seaweedfs/templates/master-cert.yaml @@ -0,0 +1,33 @@ +{{- if .Values.global.enableSecurity }} +apiVersion: certmanager.k8s.io/v1alpha1 +kind: Certificate +metadata: + name: {{ template "seaweedfs.name" . }}-master-cert + namespace: {{ .Release.Namespace }} +spec: + secretName: {{ template "seaweedfs.name" . }}-master-cert + issuerRef: + name: {{ template "seaweedfs.name" . }}-clusterissuer + kind: ClusterIssuer + commonName: {{ .Values.certificates.commonName }} + organization: + - "SeaweedFS CA" + dnsNames: + - '*.{{ .Release.Namespace }}' + - '*.{{ .Release.Namespace }}.svc' + - '*.{{ .Release.Namespace }}.svc.cluster.local' + - '*.{{ template "seaweedfs.name" . }}-master' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}.svc' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}.svc.cluster.local' +{{- if .Values.certificates.ipAddresses }} + ipAddresses: + {{- range .Values.certificates.ipAddresses }} + - {{ . }} + {{- end }} +{{- end }} + keyAlgorithm: {{ .Values.certificates.keyAlgorithm }} + keySize: {{ .Values.certificates.keySize }} + duration: {{ .Values.certificates.duration }} + renewBefore: {{ .Values.certificates.renewBefore }} +{{- end }} diff --git a/k8s/seaweedfs/templates/master-service.yaml b/k8s/seaweedfs/templates/master-service.yaml new file mode 100644 index 000000000..f7603bd91 --- /dev/null +++ b/k8s/seaweedfs/templates/master-service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "seaweedfs.name" . }}-master + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "seaweedfs.name" . }} + component: master + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + clusterIP: None + ports: + - name: "swfs-master" + port: {{ .Values.master.port }} + targetPort: {{ .Values.master.port }} + protocol: TCP + - name: "swfs-master-grpc" + port: {{ .Values.master.grpcPort }} + targetPort: {{ .Values.master.grpcPort }} + protocol: TCP + selector: + app: {{ template "seaweedfs.name" . }} + component: master
\ No newline at end of file diff --git a/k8s/seaweedfs/templates/master-statefulset.yaml b/k8s/seaweedfs/templates/master-statefulset.yaml new file mode 100644 index 000000000..87050534f --- /dev/null +++ b/k8s/seaweedfs/templates/master-statefulset.yaml @@ -0,0 +1,199 @@ +{{- if .Values.master.enabled }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ template "seaweedfs.name" . }}-master + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "seaweedfs.name" . }}-master + podManagementPolicy: Parallel + replicas: {{ .Values.master.replicas }} + {{- if (gt (int .Values.master.updatePartition) 0) }} + updateStrategy: + type: RollingUpdate + rollingUpdate: + partition: {{ .Values.master.updatePartition }} + {{- end }} + selector: + matchLabels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + release: {{ .Release.Name }} + component: master + template: + metadata: + labels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + release: {{ .Release.Name }} + component: master + spec: + restartPolicy: {{ default .Values.global.restartPolicy .Values.master.restartPolicy }} + {{- if .Values.master.affinity }} + affinity: + {{ tpl .Values.master.affinity . | nindent 8 | trim }} + {{- end }} + {{- if .Values.master.tolerations }} + tolerations: + {{ tpl .Values.master.tolerations . | nindent 8 | trim }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecrets }} + {{- end }} + terminationGracePeriodSeconds: 60 + {{- if .Values.master.priorityClassName }} + priorityClassName: {{ .Values.master.priorityClassName | quote }} + {{- end }} + enableServiceLinks: false + containers: + - name: seaweedfs + image: {{ template "master.image" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.imagePullPolicy }} + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: SEAWEEDFS_FULLNAME + value: "{{ template "seaweedfs.name" . }}" + command: + - "/bin/sh" + - "-ec" + - | + exec /usr/bin/weed -logdir=/logs \ + {{- if .Values.master.loggingOverrideLevel }} + -v={{ .Values.master.loggingOverrideLevel }} \ + {{- else }} + -v={{ .Values.global.loggingLevel }} \ + {{- end }} + master \ + -port={{ .Values.master.port }} \ + -mdir=/data \ + -ip.bind={{ .Values.master.ipBind }} \ + {{- if .Values.master.volumePreallocate }} + -volumePreallocate \ + {{- end }} + {{- if .Values.global.monitoring.enabled }} + -metrics.address="{{ .Values.global.monitoring.gatewayHost }}:{{ .Values.global.monitoring.gatewayPort }}" \ + {{- end }} + -volumeSizeLimitMB={{ .Values.master.volumeSizeLimitMB }} \ + {{- if .Values.master.disableHttp }} + -disableHttp \ + {{- end }} + -ip=${POD_NAME}.${SEAWEEDFS_FULLNAME}-master \ + -peers={{ range $index := until (.Values.master.replicas | int) }}${SEAWEEDFS_FULLNAME}-master-{{ $index }}.${SEAWEEDFS_FULLNAME}-master:{{ $.Values.master.port }}{{ if lt $index (sub ($.Values.master.replicas | int) 1) }},{{ end }}{{ end }} + volumeMounts: + - name : data-{{ .Release.Namespace }} + mountPath: /data + - name: seaweedfs-master-log-volume + mountPath: "/logs/" + {{- if .Values.global.enableSecurity }} + - name: security-config + readOnly: true + mountPath: /etc/seaweedfs/security.toml + subPath: security.toml + - name: ca-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/ca/ + - name: master-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/master/ + - name: volume-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/volume/ + - name: filer-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/filer/ + - name: client-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/client/ + {{- end }} + {{ tpl .Values.master.extraVolumeMounts . | nindent 12 | trim }} + ports: + - containerPort: {{ .Values.master.port }} + name: swfs-master + - containerPort: {{ .Values.master.grpcPort }} + #name: swfs-master-grpc + readinessProbe: + httpGet: + path: /cluster/status + port: {{ .Values.master.port }} + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 15 + successThreshold: 2 + failureThreshold: 100 + livenessProbe: + httpGet: + path: /cluster/status + port: {{ .Values.master.port }} + scheme: HTTP + initialDelaySeconds: 20 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 6 + {{- if .Values.master.resources }} + resources: + {{ tpl .Values.master.resources . | nindent 12 | trim }} + {{- end }} + volumes: + - name: seaweedfs-master-log-volume + hostPath: + path: /storage/logs/seaweedfs/master + type: DirectoryOrCreate + - name: data-{{ .Release.Namespace }} + hostPath: + path: /ssd/seaweed-master/ + type: DirectoryOrCreate + {{- if .Values.global.enableSecurity }} + - name: security-config + configMap: + name: {{ template "seaweedfs.name" . }}-security-config + - name: ca-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-ca-cert + - name: master-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-master-cert + - name: volume-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-volume-cert + - name: filer-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-filer-cert + - name: client-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-client-cert + {{- end }} + {{ tpl .Values.master.extraVolumes . | indent 8 | trim }} + {{- if .Values.master.nodeSelector }} + nodeSelector: + {{ tpl .Values.master.nodeSelector . | indent 8 | trim }} + {{- end }} +{{/* volumeClaimTemplates:*/}} +{{/* - metadata:*/}} +{{/* name: data-{{ .Release.Namespace }}*/}} +{{/* spec:*/}} +{{/* accessModes:*/}} +{{/* - ReadWriteOnce*/}} +{{/* resources:*/}} +{{/* requests:*/}} +{{/* storage: {{ .Values.master.storage }}*/}} +{{/* {{- if .Values.master.storageClass }}*/}} +{{/* storageClassName: {{ .Values.master.storageClass }}*/}} +{{/* {{- end }}*/}} +{{- end }} diff --git a/k8s/seaweedfs/templates/s3-deployment.yaml b/k8s/seaweedfs/templates/s3-deployment.yaml new file mode 100644 index 000000000..1bb3283f1 --- /dev/null +++ b/k8s/seaweedfs/templates/s3-deployment.yaml @@ -0,0 +1,158 @@ +{{- if .Values.s3.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "seaweedfs.name" . }}-s3 + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "seaweedfs.name" . }}-s3 + replicas: {{ .Values.s3.replicas }} + selector: + matchLabels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + release: {{ .Release.Name }} + component: s3 + template: + metadata: + labels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + release: {{ .Release.Name }} + component: s3 + spec: + restartPolicy: {{ default .Values.global.restartPolicy .Values.s3.restartPolicy }} + {{- if .Values.s3.tolerations }} + tolerations: + {{ tpl .Values.s3.tolerations . | nindent 8 | trim }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecrets }} + {{- end }} + terminationGracePeriodSeconds: 10 + {{- if .Values.s3.priorityClassName }} + priorityClassName: {{ .Values.s3.priorityClassName | quote }} + {{- end }} + enableServiceLinks: false + containers: + - name: seaweedfs + image: {{ template "s3.image" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.imagePullPolicy }} + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: SEAWEEDFS_FULLNAME + value: "{{ template "seaweedfs.name" . }}" + command: + - "/bin/sh" + - "-ec" + - | + exec /usr/bin/weed \ + {{- if .Values.s3.loggingOverrideLevel }} + -v={{ .Values.s3.loggingOverrideLevel }} \ + {{- else }} + -v={{ .Values.global.loggingLevel }} \ + {{- end }} + s3 \ + -port={{ .Values.s3.port }} \ + {{- if .Values.global.enableSecurity }} + -cert.file=/usr/local/share/ca-certificates/client/tls.crt \ + -key.file=/usr/local/share/ca-certificates/client/tls.key \ + {{- end }} + {{- if .Values.s3.domainName }} + -domainName={{ .Values.s3.domainName }} \ + {{- end }} + -filer={{ template "seaweedfs.name" . }}-filer:{{ .Values.filer.port }} + {{- if or (.Values.global.enableSecurity) (.Values.s3.extraVolumeMounts) }} + volumeMounts: + {{- if .Values.global.enableSecurity }} + - name: security-config + readOnly: true + mountPath: /etc/seaweedfs/security.toml + subPath: security.toml + - name: ca-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/ca/ + - name: master-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/master/ + - name: volume-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/volume/ + - name: filer-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/filer/ + - name: client-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/client/ + {{- end }} + {{ tpl .Values.s3.extraVolumeMounts . | nindent 12 | trim }} + {{- end }} + ports: + - containerPort: {{ .Values.s3.port }} + name: swfs-s3 + readinessProbe: + httpGet: + path: / + port: {{ .Values.s3.port }} + scheme: HTTP + initialDelaySeconds: 15 + periodSeconds: 15 + successThreshold: 1 + failureThreshold: 100 + livenessProbe: + httpGet: + path: / + port: {{ .Values.s3.port }} + scheme: HTTP + initialDelaySeconds: 20 + periodSeconds: 60 + successThreshold: 1 + failureThreshold: 20 + {{- if .Values.s3.resources }} + resources: + {{ tpl .Values.s3.resources . | nindent 12 | trim }} + {{- end }} + volumes: + {{- if .Values.global.enableSecurity }} + - name: security-config + configMap: + name: {{ template "seaweedfs.name" . }}-security-config + - name: ca-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-ca-cert + - name: master-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-master-cert + - name: volume-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-volume-cert + - name: filer-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-filer-cert + - name: client-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-client-cert + {{- end }} + {{ tpl .Values.s3.extraVolumes . | indent 8 | trim }} + {{- if .Values.s3.nodeSelector }} + nodeSelector: + {{ tpl .Values.s3.nodeSelector . | indent 8 | trim }} + {{- end }} +{{- end }} diff --git a/k8s/seaweedfs/templates/s3-service.yaml b/k8s/seaweedfs/templates/s3-service.yaml new file mode 100644 index 000000000..b088e25fa --- /dev/null +++ b/k8s/seaweedfs/templates/s3-service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "seaweedfs.name" . }}-s3 + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "seaweedfs.name" . }} + component: s3 +spec: + ports: + - name: "swfs-s3" + port: {{ .Values.s3.port }} + targetPort: {{ .Values.s3.port }} + protocol: TCP + selector: + app: {{ template "seaweedfs.name" . }} + component: s3
\ No newline at end of file diff --git a/k8s/seaweedfs/templates/seaweefs-grafana-dashboard.yaml b/k8s/seaweedfs/templates/seaweefs-grafana-dashboard.yaml new file mode 100644 index 000000000..c943ea50f --- /dev/null +++ b/k8s/seaweedfs/templates/seaweefs-grafana-dashboard.yaml @@ -0,0 +1,1352 @@ +{{- if .Values.global.monitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: seaweefsfs-grafana-dashboard + labels: + grafana_dashboard: "1" +data: + seaweedfs.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "Prometheus", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": 10423, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "30s", + "rows": [ + { + "collapse": false, + "height": 251, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.80, sum(rate(SeaweedFS_filer_request_seconds_bucket[1m])) by (le))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "average", + "refId": "A", + "step": 60 + }, + { + "expr": "histogram_quantile(0.80, sum(rate(SeaweedFS_filer_request_seconds_bucket[1m])) by (le, type))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{`{{type}}`}}", + "refId": "B", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filer Request Duration 95th percentile", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 49, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(SeaweedFS_filer_request_seconds_bucket[1m])) by (le))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "average", + "refId": "A", + "step": 60 + }, + { + "expr": "histogram_quantile(0.95, sum(rate(SeaweedFS_filer_request_seconds_bucket[1m])) by (le, type))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{`{{type}}`}}", + "refId": "B", + "step": 60 + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filer Request Duration 95th percentile", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 45, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(SeaweedFS_filer_request_seconds_bucket[1m])) by (le))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "average", + "refId": "A", + "step": 60 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(SeaweedFS_filer_request_seconds_bucket[1m])) by (le, type))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{`{{type}}`}}", + "refId": "B", + "step": 60 + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filer Request Duration 99th percentile", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 250, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "lines": false + } + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(SeaweedFS_filer_request_total[1m]) * 5", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{type}}`}}", + "refId": "A", + "step": 30 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filer QPS", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Filer", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 252, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 47, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(SeaweedFS_volumeServer_request_seconds_bucket[1m])) by (le, exported_instance))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{`{{exported_instance}}`}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(SeaweedFS_volumeServer_request_seconds_bucket[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "average", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Volume Server Request Duration 99th percentile", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 40, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "total", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(SeaweedFS_volumeServer_request_total[1m])) by (type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{type}}`}}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Volume Server QPS", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "id": 48, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(SeaweedFS_volumeServer_volumes) by (collection, type)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{`{{collection}}`}} {{`{{type}}`}}", + "refId": "A" + }, + { + "expr": "sum(SeaweedFS_volumeServer_max_volumes)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Total", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Volume Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "id": 50, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(SeaweedFS_volumeServer_total_disk_size) by (collection, type)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{`{{collection}}`}} {{`{{type}}`}}", + "refId": "A" + }, + { + "expr": "sum(SeaweedFS_volumeServer_total_disk_size)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Total", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Used Disk Space by Collection and Type", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "id": 51, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(SeaweedFS_volumeServer_total_disk_size) by (exported_instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{`{{exported_instance}}`}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Used Disk Space by Host", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Volume Server", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 251, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(SeaweedFS_filerStore_request_seconds_bucket[1m])) by (le, type))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{type}}`}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filer Store Request Duration 99th percentile", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(SeaweedFS_filerStore_request_total [1m])) by (type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{type}}`}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filer Store QPS", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Filer Store", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 242, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 52, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_alloc_bytes{exported_job=\"filer\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "bytes allocated", + "refId": "B" + }, + { + "expr": "rate(go_memstats_alloc_bytes_total{exported_job=\"filer\"}[30s])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "alloc rate", + "refId": "A" + }, + { + "expr": "go_memstats_stack_inuse_bytes{exported_job=\"filer\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "stack inuse", + "refId": "C" + }, + { + "expr": "go_memstats_heap_inuse_bytes{exported_job=\"filer\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "heap inuse", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filer Go Memory Stats", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 54, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_gc_duration_seconds{exported_job=\"filer\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{quantile}}`}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filer Go GC duration quantiles", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 53, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_goroutines{exported_job=\"filer\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{`{{exported_instance}}`}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filer Go Routines", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Filer Instances", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "SeaweedFS", + "version": 3 + } +{{- end }}
\ No newline at end of file diff --git a/k8s/seaweedfs/templates/secret-seaweedfs-db.yaml b/k8s/seaweedfs/templates/secret-seaweedfs-db.yaml new file mode 100644 index 000000000..c6132c9ea --- /dev/null +++ b/k8s/seaweedfs/templates/secret-seaweedfs-db.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Secret +type: Opaque +metadata: + name: secret-seaweedfs-db + namespace: {{ .Release.Namespace }} + annotations: + "helm.sh/resource-policy": keep + "helm.sh/hook": "pre-install" +stringData: + user: "YourSWUser" + password: "HardCodedPassword" + # better to random generate and create in DB + # password: {{ randAlphaNum 10 | sha256sum | b64enc | trunc 32 }} diff --git a/k8s/seaweedfs/templates/security-configmap.yaml b/k8s/seaweedfs/templates/security-configmap.yaml new file mode 100644 index 000000000..7d06614ec --- /dev/null +++ b/k8s/seaweedfs/templates/security-configmap.yaml @@ -0,0 +1,52 @@ +{{- if .Values.global.enableSecurity }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "seaweedfs.name" . }}-security-config + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +data: + security.toml: |- + # this file is read by master, volume server, and filer + + # the jwt signing key is read by master and volume server + # a jwt expires in 10 seconds + [jwt.signing] + key = "{{ randAlphaNum 10 | b64enc }}" + + # all grpc tls authentications are mutual + # the values for the following ca, cert, and key are paths to the PERM files. + [grpc] + ca = "/usr/local/share/ca-certificates/ca/tls.crt" + + [grpc.volume] + cert = "/usr/local/share/ca-certificates/volume/tls.crt" + key = "/usr/local/share/ca-certificates/volume/tls.key" + + [grpc.master] + cert = "/usr/local/share/ca-certificates/master/tls.crt" + key = "/usr/local/share/ca-certificates/master/tls.key" + + [grpc.filer] + cert = "/usr/local/share/ca-certificates/filer/tls.crt" + key = "/usr/local/share/ca-certificates/filer/tls.key" + + # use this for any place needs a grpc client + # i.e., "weed backup|benchmark|filer.copy|filer.replicate|mount|s3|upload" + [grpc.client] + cert = "/usr/local/share/ca-certificates/client/tls.crt" + key = "/usr/local/share/ca-certificates/client/tls.key" + + # volume server https options + # Note: work in progress! + # this does not work with other clients, e.g., "weed filer|mount" etc, yet. + [https.client] + enabled = false + [https.volume] + cert = "" + key = "" +{{- end }} diff --git a/k8s/seaweedfs/templates/service-account.yaml b/k8s/seaweedfs/templates/service-account.yaml new file mode 100644 index 000000000..e82ef7d62 --- /dev/null +++ b/k8s/seaweedfs/templates/service-account.yaml @@ -0,0 +1,29 @@ +#hack for delete pod master after migration +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: seaweefds-rw-cr +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: seaweefds-rw-sa + namespace: {{ .Release.Namespace }} +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: system:serviceaccount:seaweefds-rw-sa:default +subjects: +- kind: ServiceAccount + name: seaweefds-rw-sa + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: seaweefds-rw-cr
\ No newline at end of file diff --git a/k8s/seaweedfs/templates/volume-cert.yaml b/k8s/seaweedfs/templates/volume-cert.yaml new file mode 100644 index 000000000..72c62a0f5 --- /dev/null +++ b/k8s/seaweedfs/templates/volume-cert.yaml @@ -0,0 +1,33 @@ +{{- if .Values.global.enableSecurity }} +apiVersion: certmanager.k8s.io/v1alpha1 +kind: Certificate +metadata: + name: {{ template "seaweedfs.name" . }}-volume-cert + namespace: {{ .Release.Namespace }} +spec: + secretName: {{ template "seaweedfs.name" . }}-volume-cert + issuerRef: + name: {{ template "seaweedfs.name" . }}-clusterissuer + kind: ClusterIssuer + commonName: {{ .Values.certificates.commonName }} + organization: + - "SeaweedFS CA" + dnsNames: + - '*.{{ .Release.Namespace }}' + - '*.{{ .Release.Namespace }}.svc' + - '*.{{ .Release.Namespace }}.svc.cluster.local' + - '*.{{ template "seaweedfs.name" . }}-master' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}.svc' + - '*.{{ template "seaweedfs.name" . }}-master.{{ .Release.Namespace }}.svc.cluster.local' +{{- if .Values.certificates.ipAddresses }} + ipAddresses: + {{- range .Values.certificates.ipAddresses }} + - {{ . }} + {{- end }} +{{- end }} + keyAlgorithm: {{ .Values.certificates.keyAlgorithm }} + keySize: {{ .Values.certificates.keySize }} + duration: {{ .Values.certificates.duration }} + renewBefore: {{ .Values.certificates.renewBefore }} +{{- end }} diff --git a/k8s/seaweedfs/templates/volume-service.yaml b/k8s/seaweedfs/templates/volume-service.yaml new file mode 100644 index 000000000..fc7716681 --- /dev/null +++ b/k8s/seaweedfs/templates/volume-service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "seaweedfs.name" . }}-volume + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "seaweedfs.name" . }} + component: volume +spec: + clusterIP: None + ports: + - name: "swfs-volume" + port: {{ .Values.volume.port }} + targetPort: {{ .Values.volume.port }} + protocol: TCP + - name: "swfs-volume-18080" + port: {{ .Values.volume.grpcPort }} + targetPort: {{ .Values.volume.grpcPort }} + protocol: TCP + selector: + app: {{ template "seaweedfs.name" . }} + component: volume
\ No newline at end of file diff --git a/k8s/seaweedfs/templates/volume-statefulset.yaml b/k8s/seaweedfs/templates/volume-statefulset.yaml new file mode 100644 index 000000000..9c6ddcd9f --- /dev/null +++ b/k8s/seaweedfs/templates/volume-statefulset.yaml @@ -0,0 +1,187 @@ +{{- if .Values.volume.enabled }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ template "seaweedfs.name" . }}-volume + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} +spec: + serviceName: {{ template "seaweedfs.name" . }}-volume + replicas: {{ .Values.volume.replicas }} + selector: + matchLabels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + release: {{ .Release.Name }} + component: volume + template: + metadata: + labels: + app: {{ template "seaweedfs.name" . }} + chart: {{ template "seaweedfs.chart" . }} + release: {{ .Release.Name }} + component: volume + spec: + {{- if .Values.volume.affinity }} + affinity: + {{ tpl .Values.volume.affinity . | nindent 8 | trim }} + {{- end }} + restartPolicy: {{ default .Values.global.restartPolicy .Values.volume.restartPolicy }} + {{- if .Values.volume.tolerations }} + tolerations: + {{ tpl .Values.volume.tolerations . | nindent 8 | trim }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + - name: {{ .Values.global.imagePullSecrets }} + {{- end }} + terminationGracePeriodSeconds: 10 + {{- if .Values.volume.priorityClassName }} + priorityClassName: {{ .Values.volume.priorityClassName | quote }} + {{- end }} + enableServiceLinks: false + containers: + - name: seaweedfs + image: {{ template "volume.image" . }} + imagePullPolicy: {{ default "IfNotPresent" .Values.global.imagePullPolicy }} + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: SEAWEEDFS_FULLNAME + value: "{{ template "seaweedfs.name" . }}" + command: + - "/bin/sh" + - "-ec" + - | + exec /usr/bin/weed -logdir=/logs \ + {{- if .Values.volume.loggingOverrideLevel }} + -v={{ .Values.volume.loggingOverrideLevel }} \ + {{- else }} + -v={{ .Values.global.loggingLevel }} \ + {{- end }} + volume \ + -port={{ .Values.volume.port }} \ + -dir={{ .Values.volume.dir }} \ + -max={{ .Values.volume.maxVolumes }} \ + {{- if .Values.volume.rack }} + -rack={{ .Values.volume.rack }} \ + {{- end }} + {{- if .Values.volume.dataCenter }} + -dataCenter={{ .Values.volume.dataCenter }} \ + {{- end }} + -ip.bind={{ .Values.volume.ipBind }} \ + -read.redirect={{ .Values.volume.readRedirect }} \ + {{- if .Values.volume.whiteList }} + -whiteList={{ .Values.volume.whiteList }} \ + {{- end }} + {{- if .Values.volume.imagesFixOrientation }} + -images.fix.orientation \ + {{- end }} + -ip=${POD_NAME}.${SEAWEEDFS_FULLNAME}-volume \ + -compactionMBps={{ .Values.volume.compactionMBps }} \ + -mserver={{ range $index := until (.Values.master.replicas | int) }}${SEAWEEDFS_FULLNAME}-master-{{ $index }}.${SEAWEEDFS_FULLNAME}-master:{{ $.Values.master.port }}{{ if lt $index (sub ($.Values.master.replicas | int) 1) }},{{ end }}{{ end }} + volumeMounts: + - name: seaweedfs-volume-storage + mountPath: "/data/" + - name: seaweedfs-volume-log-volume + mountPath: "/logs/" + {{- if .Values.global.enableSecurity }} + - name: security-config + readOnly: true + mountPath: /etc/seaweedfs/security.toml + subPath: security.toml + - name: ca-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/ca/ + - name: master-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/master/ + - name: volume-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/volume/ + - name: filer-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/filer/ + - name: client-cert + readOnly: true + mountPath: /usr/local/share/ca-certificates/client/ + {{- end }} + {{ tpl .Values.volume.extraVolumeMounts . | nindent 12 | trim }} + ports: + - containerPort: {{ .Values.volume.port }} + name: swfs-vol + - containerPort: {{ .Values.volume.grpcPort }} + #name: swfs-vol-grpc + readinessProbe: + httpGet: + path: /status + port: {{ .Values.volume.port }} + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 15 + successThreshold: 1 + failureThreshold: 100 + livenessProbe: + httpGet: + path: /status + port: {{ .Values.volume.port }} + scheme: HTTP + initialDelaySeconds: 20 + periodSeconds: 30 + successThreshold: 1 + failureThreshold: 10 + {{- if .Values.volume.resources }} + resources: + {{ tpl .Values.volume.resources . | nindent 12 | trim }} + {{- end }} + volumes: + - name: seaweedfs-volume-log-volume + hostPath: + path: /storage/logs/seaweedfs/volume + type: DirectoryOrCreate + - name: seaweedfs-volume-storage + hostPath: + path: /storage/object_store/ + type: DirectoryOrCreate + {{- if .Values.global.enableSecurity }} + - name: security-config + configMap: + name: {{ template "seaweedfs.name" . }}-security-config + - name: ca-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-ca-cert + - name: master-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-master-cert + - name: volume-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-volume-cert + - name: filer-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-filer-cert + - name: client-cert + secret: + secretName: {{ template "seaweedfs.name" . }}-client-cert + {{- end }} + {{- if .Values.volume.extraVolumes }} + {{ tpl .Values.volume.extraVolumes . | indent 8 | trim }} + {{- end }} + {{- if .Values.volume.nodeSelector }} + nodeSelector: + {{ tpl .Values.volume.nodeSelector . | indent 8 | trim }} + {{- end }} +{{- end }} diff --git a/k8s/seaweedfs/values.yaml b/k8s/seaweedfs/values.yaml new file mode 100644 index 000000000..d3e030a08 --- /dev/null +++ b/k8s/seaweedfs/values.yaml @@ -0,0 +1,308 @@ +# Available parameters and their default values for the SeaweedFS chart. + +global: + registry: "" + repository: "" + imageName: chrislusf/seaweedfs + imageTag: "1.61" + imagePullPolicy: IfNotPresent + imagePullSecrets: imagepullsecret + restartPolicy: Always + loggingLevel: 1 + enableSecurity: false + monitoring: + enabled: false + gatewayHost: null + gatewayPort: null + +image: + registry: "" + repository: "" + +master: + enabled: true + repository: null + imageName: null + imageTag: null + imageOverride: null + restartPolicy: null + replicas: 1 + port: 9333 + grpcPort: 19333 + ipBind: "0.0.0.0" + volumePreallocate: false + volumeSizeLimitMB: 30000 + loggingOverrideLevel: null + + # Disable http request, only gRpc operations are allowed + disableHttp: false + + extraVolumes: "" + extraVolumeMounts: "" + + # storage and storageClass are the settings for configuring stateful + # storage for the master pods. storage should be set to the disk size of + # the attached volume. storageClass is the class of storage which defaults + # to null (the Kube cluster will pick the default). + storage: 25Gi + storageClass: null + + # Resource requests, limits, etc. for the master cluster placement. This + # should map directly to the value of the resources field for a PodSpec, + # formatted as a multi-line string. By default no direct resource request + # is made. + resources: null + + # updatePartition is used to control a careful rolling update of SeaweedFS + # masters. + updatePartition: 0 + + # Affinity Settings + # Commenting out or setting as empty the affinity variable, will allow + # deployment to single node services such as Minikube + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: {{ template "seaweedfs.name" . }} + release: "{{ .Release.Name }}" + component: master + topologyKey: kubernetes.io/hostname + + # Toleration Settings for master pods + # This should be a multi-line string matching the Toleration array + # in a PodSpec. + tolerations: "" + + # nodeSelector labels for master pod assignment, formatted as a muli-line string. + # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + # Example: + # nodeSelector: | + # beta.kubernetes.io/arch: amd64 + nodeSelector: | + sw-backend: "true" + + # used to assign priority to master pods + # ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/ + priorityClassName: "" + + +volume: + enabled: true + repository: null + imageName: null + imageTag: null + imageOverride: null + restartPolicy: null + port: 8080 + grpcPort: 18080 + ipBind: "0.0.0.0" + replicas: 1 + loggingOverrideLevel: null + + # limit background compaction or copying speed in mega bytes per second + compactionMBps: "40" + + # Directories to store data files. dir[,dir]... (default "/tmp") + dir: "/data" + + # Maximum numbers of volumes, count[,count]... (default "7") + maxVolumes: "10000" + + # Volume server's rack name + rack: null + + # Volume server's data center name + dataCenter: null + + # Redirect moved or non-local volumes. (default true) + readRedirect: true + + # Comma separated Ip addresses having write permission. No limit if empty. + whiteList: null + + # Adjust jpg orientation when uploading. + imagesFixOrientation: false + + extraVolumes: "" + extraVolumeMounts: "" + + # Affinity Settings + # Commenting out or setting as empty the affinity variable, will allow + # deployment to single node services such as Minikube + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: {{ template "seaweedfs.name" . }} + release: "{{ .Release.Name }}" + component: volume + topologyKey: kubernetes.io/hostname + + # Resource requests, limits, etc. for the server cluster placement. This + # should map directly to the value of the resources field for a PodSpec, + # formatted as a multi-line string. By default no direct resource request + # is made. + resources: null + + # Toleration Settings for server pods + # This should be a multi-line string matching the Toleration array + # in a PodSpec. + tolerations: "" + + # nodeSelector labels for server pod assignment, formatted as a muli-line string. + # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + # Example: + # nodeSelector: | + # beta.kubernetes.io/arch: amd64 + nodeSelector: | + sw-volume: "true" + + # used to assign priority to server pods + # ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/ + priorityClassName: "" + + +filer: + enabled: true + repository: null + imageName: null + imageTag: null + imageOverride: null + restartPolicy: null + replicas: 1 + port: 8888 + grpcPort: 18888 + loggingOverrideLevel: null + + # Limit sub dir listing size (default 100000) + dirListLimit: 100000 + + # Turn off directory listing + disableDirListing: false + + # Disable http request, only gRpc operations are allowed + disableHttp: false + + # storage and storageClass are the settings for configuring stateful + # storage for the master pods. storage should be set to the disk size of + # the attached volume. storageClass is the class of storage which defaults + # to null (the Kube cluster will pick the default). + storage: 25Gi + storageClass: null + + extraVolumes: "" + extraVolumeMounts: "" + + # Affinity Settings + # Commenting out or setting as empty the affinity variable, will allow + # deployment to single node services such as Minikube + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: {{ template "seaweedfs.name" . }} + release: "{{ .Release.Name }}" + component: filer + topologyKey: kubernetes.io/hostname + + # updatePartition is used to control a careful rolling update of SeaweedFS + # masters. + updatePartition: 0 + + # Resource requests, limits, etc. for the server cluster placement. This + # should map directly to the value of the resources field for a PodSpec, + # formatted as a multi-line string. By default no direct resource request + # is made. + resources: null + + # Toleration Settings for server pods + # This should be a multi-line string matching the Toleration array + # in a PodSpec. + tolerations: "" + + # nodeSelector labels for server pod assignment, formatted as a muli-line string. + # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + # Example: + # nodeSelector: | + # beta.kubernetes.io/arch: amd64 + nodeSelector: | + sw-backend: "true" + + # used to assign priority to server pods + # ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/ + priorityClassName: "" + + dbSchema: + imageName: db-schema + imageTag: "development" + imageOverride: "" + + # extraEnvVars is a list of extra enviroment variables to set with the stateful set. + extraEnvironmentVars: + WEED_MYSQL_ENABLED: "true" + WEED_MYSQL_HOSTNAME: "mysql-db-host" + WEED_MYSQL_PORT: "3306" + WEED_MYSQL_DATABASE: "sw_database" + WEED_MYSQL_CONNECTION_MAX_IDLE: "10" + WEED_MYSQL_CONNECTION_MAX_OPEN: "150" + # enable usage of memsql as filer backend + WEED_MYSQL_INTERPOLATEPARAMS: "true" + WEED_LEVELDB2_ENABLED: "false" + # with http DELETE, by default the filer would check whether a folder is empty. + # recursive_delete will delete all sub folders and files, similar to "rm -Rf" + WEED_FILER_OPTIONS_RECURSIVE_DELETE: "false" + # directories under this folder will be automatically creating a separate bucket + WEED_FILER_BUCKETS_FOLDER: "/buckets" + # directories under this folder will be store message queue data + WEED_FILER_QUEUES_FOLDER: "/queues" + +s3: + enabled: true + repository: null + imageName: null + imageTag: null + restartPolicy: null + replicas: 1 + port: 8333 + loggingOverrideLevel: null + + # Suffix of the host name, {bucket}.{domainName} + domainName: "" + + extraVolumes: "" + extraVolumeMounts: "" + + # Resource requests, limits, etc. for the server cluster placement. This + # should map directly to the value of the resources field for a PodSpec, + # formatted as a multi-line string. By default no direct resource request + # is made. + resources: null + + # Toleration Settings for server pods + # This should be a multi-line string matching the Toleration array + # in a PodSpec. + tolerations: "" + + # nodeSelector labels for server pod assignment, formatted as a muli-line string. + # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + # Example: + # nodeSelector: | + # beta.kubernetes.io/arch: amd64 + nodeSelector: | + sw-backend: "true" + + # used to assign priority to server pods + # ref: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/ + priorityClassName: "" + +certificates: + commonName: "SeaweedFS CA" + ipAddresses: [] + keyAlgorithm: rsa + keySize: 2048 + duration: 2160h # 90d + renewBefore: 360h # 15d diff --git a/note/memory_usage.txt b/note/memory_usage.txt deleted file mode 100644 index 0beb38558..000000000 --- a/note/memory_usage.txt +++ /dev/null @@ -1,278 +0,0 @@ -64 32G volumes consumes 10G memory -Each volume has 25M index, so each cost 160MB memory - - - -Things happened when I use lots of threads ( almost 120 ) keeping read file from SeaweedFS. -But I'm not so familiar with linux so I can't tell you exactly what happened. -Next I'll show you things I know , if you need more info , contact me - -My SeaweedFS version is about 0.12 - -1. top - -top - 12:07:37 up 1 day, 3:17, 2 users, load average: 0.00, 0.00, 0.00 -Tasks: 152 total, 1 running, 151 sleeping, 0 stopped, 0 zombie -Cpu(s): 0.0%us, 0.0%sy, 0.0%ni, 99.8%id, 0.1%wa, 0.0%hi, 0.0%si, 0.0%st -Mem: 16269880k total, 16192364k used, 77516k free, 58172k buffers -Swap: 2064376k total, 12324k used, 2052052k free, 2827520k cached - - PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND - 1499 root 20 0 11.6g 10g 1424 S 0.0 65.6 7:32.53 weedvolume - 1498 root 20 0 3204m 2.1g 1428 S 0.0 13.5 4:36.59 weedvolume - 1737 root 20 0 98868 4932 2920 S 0.0 0.0 0:00.56 sshd - 1497 root 20 0 151m 4404 1152 S 0.0 0.0 1:21.40 weedmaster - 1335 root 20 0 97816 3044 2896 S 0.0 0.0 0:00.76 sshd - -After system became steady , weedvolume used 65.6% memory . - -2. free -m - - total used free shared buffers cached -Mem: 15888 15809 79 0 56 2758 --/+ buffers/cache: 12994 2894 -Swap: 2015 12 2003 - -3. startup cmd - -screen -d -m /opt/weed/weedmaster -mdir /data/weeddata/ > /data/logs/weed/master.log & -screen -d -m /opt/weed/weedvolume -volumes=0-64 -dir /data/weeddata/ -pulseSeconds=20 -publicUrl="x.y.z:9334" -port 9334 > /data/logs/weed/s01.log & -screen -d -m /opt/weed/weedvolume -volumes=65-107 -dir /data/weeddata/ -pulseSeconds=20 -publicUrl="x.y.z:9335" -port 9335 > /data/logs/weed/s02.log & - -4. du -sh . - -32G 0.dat -26M 0.idx -8.2G 100.dat -6.8M 100.idx -8.2G 101.dat -6.9M 101.idx -8.2G 102.dat -6.8M 102.idx -8.2G 103.dat -6.8M 103.idx -8.2G 104.dat -6.8M 104.idx -8.2G 105.dat -6.9M 105.idx -8.2G 106.dat -6.9M 106.idx -8.2G 107.dat -6.9M 107.idx -32G 10.dat -25M 10.idx -32G 11.dat -25M 11.idx -32G 12.dat -25M 12.idx -32G 13.dat -25M 13.idx -32G 14.dat -25M 14.idx -32G 15.dat -25M 15.idx -32G 16.dat -25M 16.idx -32G 17.dat -25M 17.idx -32G 18.dat -25M 18.idx -32G 19.dat -25M 19.idx -32G 1.dat -26M 1.idx -32G 20.dat -25M 20.idx -32G 21.dat -25M 21.idx -32G 22.dat -25M 22.idx -32G 23.dat -25M 23.idx -32G 24.dat -25M 24.idx -32G 25.dat -25M 25.idx -32G 26.dat -25M 26.idx -32G 27.dat -25M 27.idx -32G 28.dat -25M 28.idx -32G 29.dat -25M 29.idx -32G 2.dat -26M 2.idx -32G 30.dat -25M 30.idx -32G 31.dat -25M 31.idx -32G 32.dat -25M 32.idx -32G 33.dat -25M 33.idx -32G 34.dat -25M 34.idx -32G 35.dat -25M 35.idx -32G 36.dat -25M 36.idx -32G 37.dat -25M 37.idx -32G 38.dat -25M 38.idx -32G 39.dat -25M 39.idx -32G 3.dat -26M 3.idx -32G 40.dat -25M 40.idx -32G 41.dat -25M 41.idx -32G 42.dat -25M 42.idx -32G 43.dat -25M 43.idx -32G 44.dat -25M 44.idx -32G 45.dat -25M 45.idx -32G 46.dat -25M 46.idx -32G 47.dat -25M 47.idx -32G 48.dat -25M 48.idx -32G 49.dat -25M 49.idx -32G 4.dat -26M 4.idx -32G 50.dat -25M 50.idx -32G 51.dat -25M 51.idx -32G 52.dat -25M 52.idx -32G 53.dat -25M 53.idx -32G 54.dat -25M 54.idx -32G 55.dat -25M 55.idx -32G 56.dat -25M 56.idx -32G 57.dat -25M 57.idx -32G 58.dat -25M 58.idx -32G 59.dat -25M 59.idx -32G 5.dat -26M 5.idx -32G 60.dat -25M 60.idx -32G 61.dat -25M 61.idx -32G 62.dat -25M 62.idx -32G 63.dat -25M 63.idx -32G 64.dat -25M 64.idx -8.2G 65.dat -6.9M 65.idx -8.2G 66.dat -6.9M 66.idx -8.2G 67.dat -6.9M 67.idx -8.2G 68.dat -6.8M 68.idx -8.2G 69.dat -6.9M 69.idx -32G 6.dat -25M 6.idx -8.2G 70.dat -6.8M 70.idx -8.2G 71.dat -6.9M 71.idx -8.2G 72.dat -6.9M 72.idx -8.2G 73.dat -6.9M 73.idx -8.2G 74.dat -6.9M 74.idx -8.2G 75.dat -6.9M 75.idx -8.1G 76.dat -6.8M 76.idx -8.2G 77.dat -6.8M 77.idx -8.2G 78.dat -6.8M 78.idx -8.1G 79.dat -6.8M 79.idx -32G 7.dat -25M 7.idx -8.2G 80.dat -6.8M 80.idx -8.2G 81.dat -6.9M 81.idx -8.2G 82.dat -6.9M 82.idx -8.2G 83.dat -6.9M 83.idx -8.2G 84.dat -6.9M 84.idx -8.2G 85.dat -6.8M 85.idx -8.2G 86.dat -6.9M 86.idx -8.2G 87.dat -6.9M 87.idx -8.2G 88.dat -6.9M 88.idx -8.2G 89.dat -6.8M 89.idx -32G 8.dat -25M 8.idx -8.2G 90.dat -6.9M 90.idx -8.1G 91.dat -6.8M 91.idx -8.1G 92.dat -6.8M 92.idx -8.1G 93.dat -6.8M 93.idx -8.2G 94.dat -6.9M 94.idx -8.2G 95.dat -6.9M 95.idx -8.2G 96.dat -6.9M 96.idx -8.2G 97.dat -6.9M 97.idx -8.2G 98.dat -6.9M 98.idx -8.2G 99.dat -6.9M 99.idx -32G 9.dat -25M 9.idx -4.0K directory.seq - -You can see the volume 1-64 is now full. - -5. more log - -see logs.zip - -In messages you can see these lines: (Line 51095) - -Sep 26 06:14:31 wedb-01 kernel: auditd: page allocation failure. order:0, mode:0x20 -Sep 26 06:14:31 wedb-01 kernel: Pid: 1009, comm: auditd Not tainted 2.6.32-220.el6.x86_64 #1 -Sep 26 06:14:31 wedb-01 kernel: Call Trace: - -After those lines , the system deny any new network connect request - -6. /dir/status - -{"Machines":[{"Server":{"Url":"127.0.0.1:9335","PublicUrl":"x.y.z:9335"},"Volumes":[{"Id":106,"Size":8728909632},{"Id":66,"Size":8729852744},{"Id":90,"Size":8747834896},{"Id":103,"Size":8718106024},{"Id":87,"Size":8732133512},{"Id":96,"Size":8737251904},{"Id":80,"Size":8704130712},{"Id":77,"Size":8717989496},{"Id":70,"Size":8731474744},{"Id":94,"Size":8758656144},{"Id":107,"Size":8729599232},{"Id":67,"Size":8736848088},{"Id":91,"Size":8665847760},{"Id":100,"Size":8703272552},{"Id":84,"Size":8745121528},{"Id":97,"Size":8713031744},{"Id":81,"Size":8726088872},{"Id":74,"Size":8738588152},{"Id":71,"Size":8729349920},{"Id":95,"Size":8741526896},{"Id":104,"Size":8699374736},{"Id":88,"Size":8740362880},{"Id":101,"Size":8711832992},{"Id":85,"Size":8723479552},{"Id":78,"Size":8700345400},{"Id":75,"Size":8727796912},{"Id":68,"Size":8698607440},{"Id":92,"Size":8682683056},{"Id":105,"Size":8741226152},{"Id":65,"Size":8725365752},{"Id":89,"Size":8703062600},{"Id":98,"Size":8742331560},{"Id":82,"Size":8762554952},{"Id":79,"Size":8696300376},{"Id":72,"Size":8708217304},{"Id":69,"Size":8740268144},{"Id":93,"Size":8685060320},{"Id":102,"Size":8708695352},{"Id":86,"Size":8783247776},{"Id":99,"Size":8753463608},{"Id":83,"Size":8725963952},{"Id":76,"Size":8694693536},{"Id":73,"Size":8733560832}]},{"Server":{"Url":"127.0.0.1:9334","PublicUrl":"x.y.z:9334"},"Volumes":[{"Id":34,"Size":33415706800},{"Id":58,"Size":33569224784},{"Id":18,"Size":33474649968},{"Id":55,"Size":33542422680},{"Id":15,"Size":33517247576},{"Id":48,"Size":33574860328},{"Id":8,"Size":33511257144},{"Id":45,"Size":33463948408},{"Id":5,"Size":34317702920},{"Id":29,"Size":33465695776},{"Id":38,"Size":33553119624},{"Id":62,"Size":33448316736},{"Id":22,"Size":33566586296},{"Id":35,"Size":33493733728},{"Id":59,"Size":33498554904},{"Id":19,"Size":33493313784},{"Id":52,"Size":33552978448},{"Id":12,"Size":33505183752},{"Id":49,"Size":33603029896},{"Id":9,"Size":33515778064},{"Id":42,"Size":33500402248},{"Id":2,"Size":34223232992},{"Id":26,"Size":33526519600},{"Id":39,"Size":33580414336},{"Id":63,"Size":33476332456},{"Id":23,"Size":33543872592},{"Id":32,"Size":33515290168},{"Id":56,"Size":33499171184},{"Id":16,"Size":33556591168},{"Id":64,"Size":33495148616},{"Id":53,"Size":33467738560},{"Id":13,"Size":33596873960},{"Id":46,"Size":33508120448},{"Id":6,"Size":33417470256},{"Id":30,"Size":33532933992},{"Id":43,"Size":33591802008},{"Id":3,"Size":34270682080},{"Id":27,"Size":33525736944},{"Id":36,"Size":33443597824},{"Id":60,"Size":33427931336},{"Id":20,"Size":33499083096},{"Id":33,"Size":33531396280},{"Id":57,"Size":33578015104},{"Id":17,"Size":33510525480},{"Id":50,"Size":33503123704},{"Id":10,"Size":33502391608},{"Id":47,"Size":33521868568},{"Id":7,"Size":33497101664},{"Id":31,"Size":33426905232},{"Id":40,"Size":33472978696},{"Id":0,"Size":34337344304},{"Id":24,"Size":33550157192},{"Id":37,"Size":33477162720},{"Id":61,"Size":33537175080},{"Id":21,"Size":33517192456},{"Id":54,"Size":33480720288},{"Id":14,"Size":33513192896},{"Id":51,"Size":33531336080},{"Id":11,"Size":33562385088},{"Id":44,"Size":33554479104},{"Id":4,"Size":34333127520},{"Id":28,"Size":33510503000},{"Id":41,"Size":33574922928},{"Id":1,"Size":34307181368},{"Id":25,"Size":33542834568}]}],"Writers":[106,66,90,103,87,96,80,77,70,94,107,67,91,100,84,97,81,74,71,95,104,88,101,85,78,75,68,92,105,65,89,98,82,79,72,69,93,102,86,99,83,76,73,34,58,18,55,15,48,8,45,5,29,38,62,22,35,59,19,52,12,49,9,42,2,26,39,63,23,32,56,16,64,53,13,46,6,30,43,3,27,36,60,20,33,57,17,50,10,47,7,31,40,0,24,37,61,21,54,14,51,11,44,4,28,41,1,25],"FileIdSequence":110250000} diff --git a/note/replication.txt b/note/replication.txt deleted file mode 100644 index a151e80c3..000000000 --- a/note/replication.txt +++ /dev/null @@ -1,109 +0,0 @@ -1. each file can choose the replication factor -2. replication granularity is in volume level -3. if not enough spaces, we can automatically decrease some volume's the replication factor, especially for cold data -4. plan to support migrating data to cheaper storage -5. plan to manual volume placement, access-based volume placement, auction based volume placement - -When a new volume server is started, it reports - 1. how many volumes it can hold - 2. current list of existing volumes and each volume's replication type -Each volume server remembers: - 1. current volume ids - 2. replica locations are read from the master - -The master assign volume ids based on - 1. replication factor - data center, rack - 2. concurrent write support -On master, stores the replication configuration -{ - replication:{ - {type:"00", min_volume_count:3, weight:10}, - {type:"01", min_volume_count:2, weight:20}, - {type:"10", min_volume_count:2, weight:20}, - {type:"11", min_volume_count:3, weight:30}, - {type:"20", min_volume_count:2, weight:20} - }, - port:9333, -} -Or manually via command line - 1. add volume with specified replication factor - 2. add volume with specified volume id - - -If duplicated volume ids are reported from different volume servers, -the master determines the replication factor of the volume, -if less than the replication factor, the volume is in readonly mode -if more than the replication factor, the volume will purge the smallest/oldest volume -if equal, the volume will function as usual - - -Use cases: - on volume server - 1. weed volume -mserver="xx.xx.xx.xx:9333" -publicUrl="good.com:8080" -dir="/tmp" -volumes=50 - on weed master - 1. weed master -port=9333 - generate a default json configuration file if doesn't exist - -Bootstrap - 1. at the very beginning, the system has no volumes at all. -When data node starts: - 1. each data node send to master its existing volumes and max volume blocks - 2. master remembers the topology/data_center/rack/data_node/volumes - for each replication level, stores - volume id ~ data node - writable volume ids -If any "assign" request comes in - 1. find a writable volume with the right replicationLevel - 2. if not found, grow the volumes with the right replication level - 3. return a writable volume to the user - - - for data node: - 0. detect existing volumes DONE - 1. onStartUp, and periodically, send existing volumes and maxVolumeCount store.Join(), DONE - 2. accept command to grow a volume( id + replication level) DONE - /admin/assign_volume?volume=some_id&replicationType=01 - 3. accept setting volumeLocationList DONE - /admin/set_volume_locations_list?volumeLocationsList=[{Vid:xxx,Locations:[loc1,loc2,loc3]}] - 4. for each write, pass the write to the next location, (Step 2) - POST method should accept an index, like ttl, get decremented every hop - for master: - 1. accept data node's report of existing volumes and maxVolumeCount ALREADY EXISTS /dir/join - 2. periodically refresh for active data nodes, and adjust writable volumes - 3. send command to grow a volume(id + replication level) DONE - 5. accept lookup for volume locations ALREADY EXISTS /dir/lookup - 6. read topology/datacenter/rack layout - -An algorithm to allocate volumes evenly, but may be inefficient if free volumes are plenty: -input: replication=xyz -algorithm: -ret_dcs = [] -foreach dc that has y+z+1 volumes{ - ret_racks = [] - foreach rack with z+1 volumes{ - ret = select z+1 servers with 1 volume - if ret.size()==z+1 { - ret_racks.append(ret) - } - } - randomly pick one rack from ret_racks - ret += select y racks with 1 volume each - if ret.size()==y+z+1{ - ret_dcs.append(ret) - } -} -randomly pick one dc from ret_dcs -ret += select x data centers with 1 volume each - -A simple replica placement algorithm, but may fail when free volume slots are not plenty: -ret := []volumes -dc = randomly pick 1 data center with y+z+1 volumes - rack = randomly pick 1 rack with z+1 volumes - ret = ret.append(randomly pick z+1 volumes) - ret = ret.append(randomly pick y racks with 1 volume) -ret = ret.append(randomly pick x data centers with 1 volume) - - -TODO: - 1. replicate content to the other server if the replication type needs replicas diff --git a/note/security.txt b/note/security.txt deleted file mode 100644 index 59ac52bd2..000000000 --- a/note/security.txt +++ /dev/null @@ -1,51 +0,0 @@ -Design for SeaweedFS security - -Design Objectives - Security can mean many different things. The original vision is that: if you have one machine lying around - somewhere with some disk space, it should be able to join your file system to contribute some disk space and - network bandwidth, securely! - - To achieve this purpose, the security should be able to: - 1. Secure the inter-server communication. Only real cluster servers can join and communicate. - 2. allow clients to securely write to volume servers - -Non Objective - Multi-tenant support. Avoid filers or clients cross-updating files. - User specific access control. - -Design Architect - master, and volume servers all talk securely via 2-way SSL for admin operations. - upon joining, master gives its secret key to volume servers. - filer or clients talk to master to get secret key, and use the key to generate JWT to write on volume server. - A side benefit: - a time limited read feature? - 4. volume server needs to expose https ports - -HTTP Connections - clear http - filer~>master, need to get a JWT from master - filer~>volume - 2-way https - master~ssl~>volume - volume~ssl~>master - -file uploading: - when volume server starts, it asks master for the secret key to decode JWT - when filer/clients wants to upload, master generate a JWT - filer~>volume(public port) - master~>volume(public port) - -Currently, volume server has 2 ip addresses: ip and publicUrl. - The ip is for admin purpose, and master talk to volume server this way. - The publicUrl is for clients to access the server, via http GET/POST/DELETE etc. - The write operations are secured by JWT. - clients talk to master also via https? possible. Decide on this later. - -Dev plan: - 1. volume server separate admin from public GET/POST/DELETE handlers - The step 1 may be good enough for most use cases. - - If 2-way ssl are still needed - 2. volume server add ssl support - 3. https connections to operate on volume servers - diff --git a/note/weedfs.txt b/note/weedfs.txt deleted file mode 100644 index 329bb5690..000000000 --- a/note/weedfs.txt +++ /dev/null @@ -1,46 +0,0 @@ -How to submit a content -1. Find physical volumes -1.c Create a hash value -1.d find a write logic volume id, and return [logic volume id, {physical volume ids}] -2. submit to physical volumes -2.c - generate the cookie - generate a unique id as key - choose the right altKey - send bytes to physical volumes -2.s each - save bytes - store map[key uint64, altKey uint32]<offset, size> - for updated entry, set old entry's offset to zero -3.c - wait for all physical volumes to finish - store the /<logic volume id>/<key>_<cookie>_<altKey>.<ext> - -How to retrieve a content -1.c - send logic volume id -1.d - find least busy volume's id -2.c - send URI /<physical volume id>/<key>_<cookie>_<altKey>.<ext> - - -How to submit a content -1. send bytes to SeaweedFS, got <volume id, key uint64, cookie code> - store <key uint64, volume id uint32, cookie code uint32, ext>, and other information - -To read a content -2. use logic volume id to lookup a <machine id> - render url as /<machine id>/<volume id>/<key>/<cookie>.ext - -The directory server -0.init - load and collect <logic volume id, machine ids> mapping -1.on submit content - find a free logic volume id, start sending content to 3 machines - if all of them finishes, return <logic volume id, key, cookie code> -2.on read content - based on logic volume id, pick a machine with less load, - return <machine id> - - diff --git a/other/java/client/pom.xml b/other/java/client/pom.xml new file mode 100644 index 000000000..0c585a941 --- /dev/null +++ b/other/java/client/pom.xml @@ -0,0 +1,167 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>com.github.chrislusf</groupId> + <artifactId>seaweedfs-client</artifactId> + <version>1.2.4</version> + + <parent> + <groupId>org.sonatype.oss</groupId> + <artifactId>oss-parent</artifactId> + <version>9</version> + </parent> + + <properties> + <protobuf.version>3.9.1</protobuf.version> + <!-- follow https://github.com/grpc/grpc-java --> + <grpc.version>1.23.0</grpc.version> + <guava.version>28.0-jre</guava.version> + </properties> + + <dependencies> + <dependency> + <groupId>com.moandjiezana.toml</groupId> + <artifactId>toml4j</artifactId> + <version>0.7.2</version> + </dependency> + <!-- https://mvnrepository.com/artifact/com.google.protobuf/protobuf-java --> + <dependency> + <groupId>com.google.protobuf</groupId> + <artifactId>protobuf-java</artifactId> + <version>${protobuf.version}</version> + </dependency> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>${guava.version}</version> + </dependency> + <dependency> + <groupId>io.grpc</groupId> + <artifactId>grpc-netty-shaded</artifactId> + <version>${grpc.version}</version> + </dependency> + <dependency> + <groupId>io.grpc</groupId> + <artifactId>grpc-protobuf</artifactId> + <version>${grpc.version}</version> + </dependency> + <dependency> + <groupId>io.grpc</groupId> + <artifactId>grpc-stub</artifactId> + <version>${grpc.version}</version> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + <version>1.7.25</version> + </dependency> + <dependency> + <groupId>org.apache.httpcomponents</groupId> + <artifactId>httpmime</artifactId> + <version>4.5.6</version> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.12</version> + <scope>test</scope> + </dependency> + </dependencies> + + <distributionManagement> + <snapshotRepository> + <id>ossrh</id> + <url>https://oss.sonatype.org/content/repositories/snapshots</url> + </snapshotRepository> + </distributionManagement> + <build> + <extensions> + <extension> + <groupId>kr.motd.maven</groupId> + <artifactId>os-maven-plugin</artifactId> + <version>1.6.2</version> + </extension> + </extensions> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>7</source> + <target>7</target> + </configuration> + </plugin> + <plugin> + <groupId>org.xolstice.maven.plugins</groupId> + <artifactId>protobuf-maven-plugin</artifactId> + <version>0.6.1</version> + <configuration> + <protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact> + <pluginId>grpc-java</pluginId> + <pluginArtifact>io.grpc:protoc-gen-grpc-java:${grpc.version}:exe:${os.detected.classifier}</pluginArtifact> + </configuration> + <executions> + <execution> + <goals> + <goal>compile</goal> + <goal>compile-custom</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-gpg-plugin</artifactId> + <version>1.5</version> + <executions> + <execution> + <id>sign-artifacts</id> + <phase>verify</phase> + <goals> + <goal>sign</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.sonatype.plugins</groupId> + <artifactId>nexus-staging-maven-plugin</artifactId> + <version>1.6.7</version> + <extensions>true</extensions> + <configuration> + <serverId>ossrh</serverId> + <nexusUrl>https://oss.sonatype.org/</nexusUrl> + <autoReleaseAfterClose>true</autoReleaseAfterClose> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-source-plugin</artifactId> + <version>2.2.1</version> + <executions> + <execution> + <id>attach-sources</id> + <goals> + <goal>jar-no-fork</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <version>2.9.1</version> + <executions> + <execution> + <id>attach-javadocs</id> + <goals> + <goal>jar</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + +</project> diff --git a/other/java/client/src/main/java/seaweedfs/client/ByteBufferOutputStream.java b/other/java/client/src/main/java/seaweedfs/client/ByteBufferOutputStream.java new file mode 100644 index 000000000..efcc265de --- /dev/null +++ b/other/java/client/src/main/java/seaweedfs/client/ByteBufferOutputStream.java @@ -0,0 +1,21 @@ +package seaweedfs.client; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +public class ByteBufferOutputStream extends OutputStream { + private final ByteBuffer buf; + + public ByteBufferOutputStream(ByteBuffer buf) { + this.buf = buf; + } + + public void write(int b) throws IOException { + this.buf.put((byte)b); + } + + public void write(byte[] b, int off, int len) throws IOException { + this.buf.put(b, off, len); + } +} diff --git a/other/java/client/src/main/java/seaweedfs/client/FilerClient.java b/other/java/client/src/main/java/seaweedfs/client/FilerClient.java new file mode 100644 index 000000000..84aa26ad9 --- /dev/null +++ b/other/java/client/src/main/java/seaweedfs/client/FilerClient.java @@ -0,0 +1,286 @@ +package seaweedfs.client; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +public class FilerClient { + + private static final Logger LOG = LoggerFactory.getLogger(FilerClient.class); + + private FilerGrpcClient filerGrpcClient; + + public FilerClient(String host, int grpcPort) { + filerGrpcClient = new FilerGrpcClient(host, grpcPort); + } + + public FilerClient(FilerGrpcClient filerGrpcClient) { + this.filerGrpcClient = filerGrpcClient; + } + + public boolean mkdirs(String path, int mode) { + String currentUser = System.getProperty("user.name"); + return mkdirs(path, mode, 0, 0, currentUser, new String[]{}); + } + + public boolean mkdirs(String path, int mode, String userName, String[] groupNames) { + return mkdirs(path, mode, 0, 0, userName, groupNames); + } + + public boolean mkdirs(String path, int mode, int uid, int gid, String userName, String[] groupNames) { + + if ("/".equals(path)) { + return true; + } + Path pathObject = Paths.get(path); + String parent = pathObject.getParent().toString(); + String name = pathObject.getFileName().toString(); + + mkdirs(parent, mode, uid, gid, userName, groupNames); + + FilerProto.Entry existingEntry = lookupEntry(parent, name); + + if (existingEntry != null) { + return true; + } + + return createEntry( + parent, + newDirectoryEntry(name, mode, uid, gid, userName, groupNames).build() + ); + + } + + public boolean mv(String oldPath, String newPath) { + + Path oldPathObject = Paths.get(oldPath); + String oldParent = oldPathObject.getParent().toString(); + String oldName = oldPathObject.getFileName().toString(); + + Path newPathObject = Paths.get(newPath); + String newParent = newPathObject.getParent().toString(); + String newName = newPathObject.getFileName().toString(); + + return atomicRenameEntry(oldParent, oldName, newParent, newName); + + } + + public boolean rm(String path, boolean isRecursive, boolean ignoreRecusiveError) { + + Path pathObject = Paths.get(path); + String parent = pathObject.getParent().toString(); + String name = pathObject.getFileName().toString(); + + return deleteEntry( + parent, + name, + true, + isRecursive, + ignoreRecusiveError); + } + + public boolean touch(String path, int mode) { + String currentUser = System.getProperty("user.name"); + return touch(path, mode, 0, 0, currentUser, new String[]{}); + } + + public boolean touch(String path, int mode, int uid, int gid, String userName, String[] groupNames) { + + Path pathObject = Paths.get(path); + String parent = pathObject.getParent().toString(); + String name = pathObject.getFileName().toString(); + + FilerProto.Entry entry = lookupEntry(parent, name); + if (entry == null) { + return createEntry( + parent, + newFileEntry(name, mode, uid, gid, userName, groupNames).build() + ); + } + long now = System.currentTimeMillis() / 1000L; + FilerProto.FuseAttributes.Builder attr = entry.getAttributes().toBuilder() + .setMtime(now) + .setUid(uid) + .setGid(gid) + .setUserName(userName) + .clearGroupName() + .addAllGroupName(Arrays.asList(groupNames)); + return updateEntry(parent, entry.toBuilder().setAttributes(attr).build()); + } + + public FilerProto.Entry.Builder newDirectoryEntry(String name, int mode, + int uid, int gid, String userName, String[] groupNames) { + + long now = System.currentTimeMillis() / 1000L; + + return FilerProto.Entry.newBuilder() + .setName(name) + .setIsDirectory(true) + .setAttributes(FilerProto.FuseAttributes.newBuilder() + .setMtime(now) + .setCrtime(now) + .setUid(uid) + .setGid(gid) + .setFileMode(mode | 1 << 31) + .setUserName(userName) + .clearGroupName() + .addAllGroupName(Arrays.asList(groupNames))); + } + + public FilerProto.Entry.Builder newFileEntry(String name, int mode, + int uid, int gid, String userName, String[] groupNames) { + + long now = System.currentTimeMillis() / 1000L; + + return FilerProto.Entry.newBuilder() + .setName(name) + .setIsDirectory(false) + .setAttributes(FilerProto.FuseAttributes.newBuilder() + .setMtime(now) + .setCrtime(now) + .setUid(uid) + .setGid(gid) + .setFileMode(mode) + .setUserName(userName) + .clearGroupName() + .addAllGroupName(Arrays.asList(groupNames))); + } + + public List<FilerProto.Entry> listEntries(String path) { + List<FilerProto.Entry> results = new ArrayList<FilerProto.Entry>(); + String lastFileName = ""; + for (int limit = Integer.MAX_VALUE; limit > 0; ) { + List<FilerProto.Entry> t = listEntries(path, "", lastFileName, 1024); + if (t == null) { + break; + } + int nSize = t.size(); + if (nSize > 0) { + limit -= nSize; + lastFileName = t.get(nSize - 1).getName(); + } + results.addAll(t); + if (t.size() < 1024) { + break; + } + } + return results; + } + + public List<FilerProto.Entry> listEntries(String path, String entryPrefix, String lastEntryName, int limit) { + Iterator<FilerProto.ListEntriesResponse> iter = filerGrpcClient.getBlockingStub().listEntries(FilerProto.ListEntriesRequest.newBuilder() + .setDirectory(path) + .setPrefix(entryPrefix) + .setStartFromFileName(lastEntryName) + .setLimit(limit) + .build()); + List<FilerProto.Entry> entries = new ArrayList<>(); + while (iter.hasNext()){ + FilerProto.ListEntriesResponse resp = iter.next(); + entries.add(fixEntryAfterReading(resp.getEntry())); + } + return entries; + } + + public FilerProto.Entry lookupEntry(String directory, String entryName) { + try { + FilerProto.Entry entry = filerGrpcClient.getBlockingStub().lookupDirectoryEntry( + FilerProto.LookupDirectoryEntryRequest.newBuilder() + .setDirectory(directory) + .setName(entryName) + .build()).getEntry(); + return fixEntryAfterReading(entry); + } catch (Exception e) { + if (e.getMessage().indexOf("filer: no entry is found in filer store")>0){ + return null; + } + LOG.warn("lookupEntry {}/{}: {}", directory, entryName, e); + return null; + } + } + + + public boolean createEntry(String parent, FilerProto.Entry entry) { + try { + filerGrpcClient.getBlockingStub().createEntry(FilerProto.CreateEntryRequest.newBuilder() + .setDirectory(parent) + .setEntry(entry) + .build()); + } catch (Exception e) { + LOG.warn("createEntry {}/{}: {}", parent, entry.getName(), e); + return false; + } + return true; + } + + public boolean updateEntry(String parent, FilerProto.Entry entry) { + try { + filerGrpcClient.getBlockingStub().updateEntry(FilerProto.UpdateEntryRequest.newBuilder() + .setDirectory(parent) + .setEntry(entry) + .build()); + } catch (Exception e) { + LOG.warn("createEntry {}/{}: {}", parent, entry.getName(), e); + return false; + } + return true; + } + + public boolean deleteEntry(String parent, String entryName, boolean isDeleteFileChunk, boolean isRecursive, boolean ignoreRecusiveError) { + try { + filerGrpcClient.getBlockingStub().deleteEntry(FilerProto.DeleteEntryRequest.newBuilder() + .setDirectory(parent) + .setName(entryName) + .setIsDeleteData(isDeleteFileChunk) + .setIsRecursive(isRecursive) + .setIgnoreRecursiveError(ignoreRecusiveError) + .build()); + } catch (Exception e) { + LOG.warn("deleteEntry {}/{}: {}", parent, entryName, e); + return false; + } + return true; + } + + public boolean atomicRenameEntry(String oldParent, String oldName, String newParent, String newName) { + try { + filerGrpcClient.getBlockingStub().atomicRenameEntry(FilerProto.AtomicRenameEntryRequest.newBuilder() + .setOldDirectory(oldParent) + .setOldName(oldName) + .setNewDirectory(newParent) + .setNewName(newName) + .build()); + } catch (Exception e) { + LOG.warn("atomicRenameEntry {}/{} => {}/{}: {}", oldParent, oldName, newParent, newName, e); + return false; + } + return true; + } + + private FilerProto.Entry fixEntryAfterReading(FilerProto.Entry entry) { + if (entry.getChunksList().size() <= 0) { + return entry; + } + String fileId = entry.getChunks(0).getFileId(); + if (fileId != null && fileId.length() != 0) { + return entry; + } + FilerProto.Entry.Builder entryBuilder = entry.toBuilder(); + entryBuilder.clearChunks(); + for (FilerProto.FileChunk chunk : entry.getChunksList()) { + FilerProto.FileChunk.Builder chunkBuilder = chunk.toBuilder(); + FilerProto.FileId fid = chunk.getFid(); + fileId = String.format("%d,%d%x", fid.getVolumeId(), fid.getFileKey(), fid.getCookie()); + chunkBuilder.setFileId(fileId); + entryBuilder.addChunks(chunkBuilder); + } + return entryBuilder.build(); + } + +} diff --git a/other/java/client/src/main/java/seaweedfs/client/FilerGrpcClient.java b/other/java/client/src/main/java/seaweedfs/client/FilerGrpcClient.java new file mode 100644 index 000000000..3626c76de --- /dev/null +++ b/other/java/client/src/main/java/seaweedfs/client/FilerGrpcClient.java @@ -0,0 +1,70 @@ +package seaweedfs.client; + +import io.grpc.ManagedChannel; +import io.grpc.ManagedChannelBuilder; +import io.grpc.netty.shaded.io.grpc.netty.NegotiationType; +import io.grpc.netty.shaded.io.grpc.netty.NettyChannelBuilder; +import io.grpc.netty.shaded.io.netty.handler.ssl.SslContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.SSLException; +import java.util.concurrent.TimeUnit; + +public class FilerGrpcClient { + + private static final Logger logger = LoggerFactory.getLogger(FilerGrpcClient.class); + + private final ManagedChannel channel; + private final SeaweedFilerGrpc.SeaweedFilerBlockingStub blockingStub; + private final SeaweedFilerGrpc.SeaweedFilerStub asyncStub; + private final SeaweedFilerGrpc.SeaweedFilerFutureStub futureStub; + + static SslContext sslContext; + + static { + try { + sslContext = FilerSslContext.loadSslContext(); + } catch (SSLException e) { + logger.warn("failed to load ssl context", e); + } + } + + public FilerGrpcClient(String host, int grpcPort) { + this(host, grpcPort, sslContext); + } + + public FilerGrpcClient(String host, int grpcPort, SslContext sslContext) { + + this(sslContext == null ? + ManagedChannelBuilder.forAddress(host, grpcPort).usePlaintext() : + NettyChannelBuilder.forAddress(host, grpcPort) + .negotiationType(NegotiationType.TLS) + .sslContext(sslContext)); + + } + + public FilerGrpcClient(ManagedChannelBuilder<?> channelBuilder) { + channel = channelBuilder.build(); + blockingStub = SeaweedFilerGrpc.newBlockingStub(channel); + asyncStub = SeaweedFilerGrpc.newStub(channel); + futureStub = SeaweedFilerGrpc.newFutureStub(channel); + } + + public void shutdown() throws InterruptedException { + channel.shutdown().awaitTermination(5, TimeUnit.SECONDS); + } + + public SeaweedFilerGrpc.SeaweedFilerBlockingStub getBlockingStub() { + return blockingStub; + } + + public SeaweedFilerGrpc.SeaweedFilerStub getAsyncStub() { + return asyncStub; + } + + public SeaweedFilerGrpc.SeaweedFilerFutureStub getFutureStub() { + return futureStub; + } + +} diff --git a/other/java/client/src/main/java/seaweedfs/client/FilerSslContext.java b/other/java/client/src/main/java/seaweedfs/client/FilerSslContext.java new file mode 100644 index 000000000..5a88c1da3 --- /dev/null +++ b/other/java/client/src/main/java/seaweedfs/client/FilerSslContext.java @@ -0,0 +1,64 @@ +package seaweedfs.client; + +import com.google.common.base.Strings; +import com.moandjiezana.toml.Toml; +import io.grpc.netty.shaded.io.grpc.netty.GrpcSslContexts; +import io.grpc.netty.shaded.io.netty.handler.ssl.SslContext; +import io.grpc.netty.shaded.io.netty.handler.ssl.SslContextBuilder; +import io.grpc.netty.shaded.io.netty.handler.ssl.util.InsecureTrustManagerFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.SSLException; +import java.io.File; + +public class FilerSslContext { + + private static final Logger logger = LoggerFactory.getLogger(FilerSslContext.class); + + public static SslContext loadSslContext() throws SSLException { + String securityFileName = "security.toml"; + String home = System.getProperty("user.home"); + File f1 = new File("./"+securityFileName); + File f2 = new File(home + "/.seaweedfs/"+securityFileName); + File f3 = new File(home + "/etc/seaweedfs/"+securityFileName); + + File securityFile = f1.exists()? f1 : f2.exists() ? f2 : f3.exists()? f3 : null; + + if (securityFile==null){ + return null; + } + + Toml toml = new Toml().read(securityFile); + logger.debug("reading ssl setup from {}", securityFile); + + String trustCertCollectionFilePath = toml.getString("grpc.ca"); + logger.debug("loading ca from {}", trustCertCollectionFilePath); + String clientCertChainFilePath = toml.getString("grpc.client.cert"); + logger.debug("loading client ca from {}", clientCertChainFilePath); + String clientPrivateKeyFilePath = toml.getString("grpc.client.key"); + logger.debug("loading client key from {}", clientPrivateKeyFilePath); + + if (Strings.isNullOrEmpty(clientPrivateKeyFilePath) && Strings.isNullOrEmpty(clientPrivateKeyFilePath)){ + return null; + } + + // possibly fix the format https://netty.io/wiki/sslcontextbuilder-and-private-key.html + + return buildSslContext(trustCertCollectionFilePath, clientCertChainFilePath, clientPrivateKeyFilePath); + } + + + private static SslContext buildSslContext(String trustCertCollectionFilePath, + String clientCertChainFilePath, + String clientPrivateKeyFilePath) throws SSLException { + SslContextBuilder builder = GrpcSslContexts.forClient(); + if (trustCertCollectionFilePath != null) { + builder.trustManager(new File(trustCertCollectionFilePath)); + } + if (clientCertChainFilePath != null && clientPrivateKeyFilePath != null) { + builder.keyManager(new File(clientCertChainFilePath), new File(clientPrivateKeyFilePath)); + } + return builder.trustManager(InsecureTrustManagerFactory.INSTANCE).build(); + } +} diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java new file mode 100644 index 000000000..b08c14467 --- /dev/null +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java @@ -0,0 +1,259 @@ +package seaweedfs.client; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpHeaders; +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; + +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.*; + +public class SeaweedRead { + + // private static final Logger LOG = LoggerFactory.getLogger(SeaweedRead.class); + + // returns bytesRead + public static long read(FilerGrpcClient filerGrpcClient, List<VisibleInterval> visibleIntervals, + final long position, final byte[] buffer, final int bufferOffset, + final int bufferLength) throws IOException { + + List<ChunkView> chunkViews = viewFromVisibles(visibleIntervals, position, bufferLength); + + FilerProto.LookupVolumeRequest.Builder lookupRequest = FilerProto.LookupVolumeRequest.newBuilder(); + for (ChunkView chunkView : chunkViews) { + String vid = parseVolumeId(chunkView.fileId); + lookupRequest.addVolumeIds(vid); + } + + FilerProto.LookupVolumeResponse lookupResponse = filerGrpcClient + .getBlockingStub().lookupVolume(lookupRequest.build()); + + Map<String, FilerProto.Locations> vid2Locations = lookupResponse.getLocationsMapMap(); + + //TODO parallel this + long readCount = 0; + int startOffset = bufferOffset; + for (ChunkView chunkView : chunkViews) { + FilerProto.Locations locations = vid2Locations.get(parseVolumeId(chunkView.fileId)); + if (locations.getLocationsCount() == 0) { + // log here! + return 0; + } + + int len = readChunkView(position, buffer, startOffset, chunkView, locations); + + readCount += len; + startOffset += len; + + } + + return readCount; + } + + private static int readChunkView(long position, byte[] buffer, int startOffset, ChunkView chunkView, FilerProto.Locations locations) throws IOException { + HttpClient client = new DefaultHttpClient(); + HttpGet request = new HttpGet( + String.format("http://%s/%s", locations.getLocations(0).getUrl(), chunkView.fileId)); + + if (!chunkView.isFullChunk) { + request.setHeader(HttpHeaders.ACCEPT_ENCODING, ""); + request.setHeader(HttpHeaders.RANGE, + String.format("bytes=%d-%d", chunkView.offset, chunkView.offset + chunkView.size - 1)); + } + + try { + HttpResponse response = client.execute(request); + HttpEntity entity = response.getEntity(); + + int len = (int) (chunkView.logicOffset - position + chunkView.size); + OutputStream outputStream = new ByteBufferOutputStream(ByteBuffer.wrap(buffer, startOffset, len)); + entity.writeTo(outputStream); + // LOG.debug("* read chunkView:{} startOffset:{} length:{}", chunkView, startOffset, len); + + return len; + + } finally { + if (client instanceof Closeable) { + Closeable t = (Closeable) client; + t.close(); + } + } + } + + protected static List<ChunkView> viewFromVisibles(List<VisibleInterval> visibleIntervals, long offset, long size) { + List<ChunkView> views = new ArrayList<>(); + + long stop = offset + size; + for (VisibleInterval chunk : visibleIntervals) { + if (chunk.start <= offset && offset < chunk.stop && offset < stop) { + boolean isFullChunk = chunk.isFullChunk && chunk.start == offset && chunk.stop <= stop; + views.add(new ChunkView( + chunk.fileId, + offset - chunk.start, + Math.min(chunk.stop, stop) - offset, + offset, + isFullChunk + )); + offset = Math.min(chunk.stop, stop); + } + } + return views; + } + + public static List<VisibleInterval> nonOverlappingVisibleIntervals(List<FilerProto.FileChunk> chunkList) { + FilerProto.FileChunk[] chunks = chunkList.toArray(new FilerProto.FileChunk[0]); + Arrays.sort(chunks, new Comparator<FilerProto.FileChunk>() { + @Override + public int compare(FilerProto.FileChunk a, FilerProto.FileChunk b) { + return (int) (a.getMtime() - b.getMtime()); + } + }); + + List<VisibleInterval> visibles = new ArrayList<>(); + for (FilerProto.FileChunk chunk : chunks) { + List<VisibleInterval> newVisibles = new ArrayList<>(); + visibles = mergeIntoVisibles(visibles, newVisibles, chunk); + } + + return visibles; + } + + private static List<VisibleInterval> mergeIntoVisibles(List<VisibleInterval> visibles, + List<VisibleInterval> newVisibles, + FilerProto.FileChunk chunk) { + VisibleInterval newV = new VisibleInterval( + chunk.getOffset(), + chunk.getOffset() + chunk.getSize(), + chunk.getFileId(), + chunk.getMtime(), + true + ); + + // easy cases to speed up + if (visibles.size() == 0) { + visibles.add(newV); + return visibles; + } + if (visibles.get(visibles.size() - 1).stop <= chunk.getOffset()) { + visibles.add(newV); + return visibles; + } + + for (VisibleInterval v : visibles) { + if (v.start < chunk.getOffset() && chunk.getOffset() < v.stop) { + newVisibles.add(new VisibleInterval( + v.start, + chunk.getOffset(), + v.fileId, + v.modifiedTime, + false + )); + } + long chunkStop = chunk.getOffset() + chunk.getSize(); + if (v.start < chunkStop && chunkStop < v.stop) { + newVisibles.add(new VisibleInterval( + chunkStop, + v.stop, + v.fileId, + v.modifiedTime, + false + )); + } + if (chunkStop <= v.start || v.stop <= chunk.getOffset()) { + newVisibles.add(v); + } + } + newVisibles.add(newV); + + // keep everything sorted + for (int i = newVisibles.size() - 1; i >= 0; i--) { + if (i > 0 && newV.start < newVisibles.get(i - 1).start) { + newVisibles.set(i, newVisibles.get(i - 1)); + } else { + newVisibles.set(i, newV); + break; + } + } + + return newVisibles; + } + + public static String parseVolumeId(String fileId) { + int commaIndex = fileId.lastIndexOf(','); + if (commaIndex > 0) { + return fileId.substring(0, commaIndex); + } + return fileId; + } + + public static long totalSize(List<FilerProto.FileChunk> chunksList) { + long size = 0; + for (FilerProto.FileChunk chunk : chunksList) { + long t = chunk.getOffset() + chunk.getSize(); + if (size < t) { + size = t; + } + } + return size; + } + + public static class VisibleInterval { + public final long start; + public final long stop; + public final long modifiedTime; + public final String fileId; + public final boolean isFullChunk; + + public VisibleInterval(long start, long stop, String fileId, long modifiedTime, boolean isFullChunk) { + this.start = start; + this.stop = stop; + this.modifiedTime = modifiedTime; + this.fileId = fileId; + this.isFullChunk = isFullChunk; + } + + @Override + public String toString() { + return "VisibleInterval{" + + "start=" + start + + ", stop=" + stop + + ", modifiedTime=" + modifiedTime + + ", fileId='" + fileId + '\'' + + ", isFullChunk=" + isFullChunk + + '}'; + } + } + + public static class ChunkView { + public final String fileId; + public final long offset; + public final long size; + public final long logicOffset; + public final boolean isFullChunk; + + public ChunkView(String fileId, long offset, long size, long logicOffset, boolean isFullChunk) { + this.fileId = fileId; + this.offset = offset; + this.size = size; + this.logicOffset = logicOffset; + this.isFullChunk = isFullChunk; + } + + @Override + public String toString() { + return "ChunkView{" + + "fileId='" + fileId + '\'' + + ", offset=" + offset + + ", size=" + size + + ", logicOffset=" + logicOffset + + ", isFullChunk=" + isFullChunk + + '}'; + } + } + +} diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java new file mode 100644 index 000000000..0663e8d98 --- /dev/null +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java @@ -0,0 +1,95 @@ +package seaweedfs.client; + +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.mime.HttpMultipartMode; +import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.impl.client.DefaultHttpClient; + +import java.io.ByteArrayInputStream; +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; + +public class SeaweedWrite { + + public static void writeData(FilerProto.Entry.Builder entry, + final String replication, + final FilerGrpcClient filerGrpcClient, + final long offset, + final byte[] bytes, + final long bytesOffset, final long bytesLength) throws IOException { + FilerProto.AssignVolumeResponse response = filerGrpcClient.getBlockingStub().assignVolume( + FilerProto.AssignVolumeRequest.newBuilder() + .setCollection("") + .setReplication(replication) + .setDataCenter("") + .setReplication("") + .setTtlSec(0) + .build()); + String fileId = response.getFileId(); + String url = response.getUrl(); + String auth = response.getAuth(); + String targetUrl = String.format("http://%s/%s", url, fileId); + + String etag = multipartUpload(targetUrl, auth, bytes, bytesOffset, bytesLength); + + entry.addChunks(FilerProto.FileChunk.newBuilder() + .setFileId(fileId) + .setOffset(offset) + .setSize(bytesLength) + .setMtime(System.currentTimeMillis() / 10000L) + .setETag(etag) + ); + + } + + public static void writeMeta(final FilerGrpcClient filerGrpcClient, + final String parentDirectory, final FilerProto.Entry.Builder entry) { + filerGrpcClient.getBlockingStub().createEntry( + FilerProto.CreateEntryRequest.newBuilder() + .setDirectory(parentDirectory) + .setEntry(entry) + .build() + ); + } + + private static String multipartUpload(String targetUrl, + String auth, + final byte[] bytes, + final long bytesOffset, final long bytesLength) throws IOException { + + HttpClient client = new DefaultHttpClient(); + + InputStream inputStream = new ByteArrayInputStream(bytes, (int) bytesOffset, (int) bytesLength); + + HttpPost post = new HttpPost(targetUrl); + if (auth != null && auth.length() != 0) { + post.addHeader("Authorization", "BEARER " + auth); + } + + post.setEntity(MultipartEntityBuilder.create() + .setMode(HttpMultipartMode.BROWSER_COMPATIBLE) + .addBinaryBody("upload", inputStream) + .build()); + + try { + HttpResponse response = client.execute(post); + + String etag = response.getLastHeader("ETag").getValue(); + + if (etag != null && etag.startsWith("\"") && etag.endsWith("\"")) { + etag = etag.substring(1, etag.length() - 1); + } + + return etag; + } finally { + if (client instanceof Closeable) { + Closeable t = (Closeable) client; + t.close(); + } + } + + } +} diff --git a/other/java/client/src/main/proto/filer.proto b/other/java/client/src/main/proto/filer.proto new file mode 100644 index 000000000..8df46e917 --- /dev/null +++ b/other/java/client/src/main/proto/filer.proto @@ -0,0 +1,235 @@ +syntax = "proto3"; + +package filer_pb; + +option java_package = "seaweedfs.client"; +option java_outer_classname = "FilerProto"; + +////////////////////////////////////////////////// + +service SeaweedFiler { + + rpc LookupDirectoryEntry (LookupDirectoryEntryRequest) returns (LookupDirectoryEntryResponse) { + } + + rpc ListEntries (ListEntriesRequest) returns (stream ListEntriesResponse) { + } + + rpc CreateEntry (CreateEntryRequest) returns (CreateEntryResponse) { + } + + rpc UpdateEntry (UpdateEntryRequest) returns (UpdateEntryResponse) { + } + + rpc DeleteEntry (DeleteEntryRequest) returns (DeleteEntryResponse) { + } + + rpc StreamDeleteEntries (stream DeleteEntryRequest) returns (stream DeleteEntryResponse) { + } + + rpc AtomicRenameEntry (AtomicRenameEntryRequest) returns (AtomicRenameEntryResponse) { + } + + rpc AssignVolume (AssignVolumeRequest) returns (AssignVolumeResponse) { + } + + rpc LookupVolume (LookupVolumeRequest) returns (LookupVolumeResponse) { + } + + rpc DeleteCollection (DeleteCollectionRequest) returns (DeleteCollectionResponse) { + } + + rpc Statistics (StatisticsRequest) returns (StatisticsResponse) { + } + + rpc GetFilerConfiguration (GetFilerConfigurationRequest) returns (GetFilerConfigurationResponse) { + } + +} + +////////////////////////////////////////////////// + +message LookupDirectoryEntryRequest { + string directory = 1; + string name = 2; +} + +message LookupDirectoryEntryResponse { + Entry entry = 1; +} + +message ListEntriesRequest { + string directory = 1; + string prefix = 2; + string startFromFileName = 3; + bool inclusiveStartFrom = 4; + uint32 limit = 5; +} + +message ListEntriesResponse { + Entry entry = 1; +} + +message Entry { + string name = 1; + bool is_directory = 2; + repeated FileChunk chunks = 3; + FuseAttributes attributes = 4; + map<string, bytes> extended = 5; +} + +message FullEntry { + string dir = 1; + Entry entry = 2; +} + +message EventNotification { + Entry old_entry = 1; + Entry new_entry = 2; + bool delete_chunks = 3; + string new_parent_path = 4; +} + +message FileChunk { + string file_id = 1; // to be deprecated + int64 offset = 2; + uint64 size = 3; + int64 mtime = 4; + string e_tag = 5; + string source_file_id = 6; // to be deprecated + FileId fid = 7; + FileId source_fid = 8; + bytes cipher_key = 9; + bool is_gzipped = 10; +} + +message FileId { + uint32 volume_id = 1; + uint64 file_key = 2; + fixed32 cookie = 3; +} + +message FuseAttributes { + uint64 file_size = 1; + int64 mtime = 2; // unix time in seconds + uint32 file_mode = 3; + uint32 uid = 4; + uint32 gid = 5; + int64 crtime = 6; // unix time in seconds + string mime = 7; + string replication = 8; + string collection = 9; + int32 ttl_sec = 10; + string user_name = 11; // for hdfs + repeated string group_name = 12; // for hdfs + string symlink_target = 13; +} + +message CreateEntryRequest { + string directory = 1; + Entry entry = 2; + bool o_excl = 3; +} + +message CreateEntryResponse { + string error = 1; +} + +message UpdateEntryRequest { + string directory = 1; + Entry entry = 2; +} +message UpdateEntryResponse { +} + +message DeleteEntryRequest { + string directory = 1; + string name = 2; + // bool is_directory = 3; + bool is_delete_data = 4; + bool is_recursive = 5; + bool ignore_recursive_error = 6; +} + +message DeleteEntryResponse { + string error = 1; +} + +message AtomicRenameEntryRequest { + string old_directory = 1; + string old_name = 2; + string new_directory = 3; + string new_name = 4; +} + +message AtomicRenameEntryResponse { +} + +message AssignVolumeRequest { + int32 count = 1; + string collection = 2; + string replication = 3; + int32 ttl_sec = 4; + string data_center = 5; + string parent_path = 6; +} + +message AssignVolumeResponse { + string file_id = 1; + string url = 2; + string public_url = 3; + int32 count = 4; + string auth = 5; + string collection = 6; + string replication = 7; + string error = 8; +} + +message LookupVolumeRequest { + repeated string volume_ids = 1; +} + +message Locations { + repeated Location locations = 1; +} + +message Location { + string url = 1; + string public_url = 2; +} +message LookupVolumeResponse { + map<string, Locations> locations_map = 1; +} + +message DeleteCollectionRequest { + string collection = 1; +} + +message DeleteCollectionResponse { +} + +message StatisticsRequest { + string replication = 1; + string collection = 2; + string ttl = 3; +} +message StatisticsResponse { + string replication = 1; + string collection = 2; + string ttl = 3; + uint64 total_size = 4; + uint64 used_size = 5; + uint64 file_count = 6; +} + +message GetFilerConfigurationRequest { +} +message GetFilerConfigurationResponse { + repeated string masters = 1; + string replication = 2; + string collection = 3; + uint32 max_mb = 4; + string dir_buckets = 5; + string dir_queues = 6; + bool cipher = 7; +} diff --git a/other/java/client/src/test/java/seaweedfs/client/SeaweedFilerTest.java b/other/java/client/src/test/java/seaweedfs/client/SeaweedFilerTest.java new file mode 100644 index 000000000..eaf17e5c6 --- /dev/null +++ b/other/java/client/src/test/java/seaweedfs/client/SeaweedFilerTest.java @@ -0,0 +1,23 @@ +package seaweedfs.client; + +import java.util.List; + +public class SeaweedFilerTest { + public static void main(String[] args){ + + FilerClient filerClient = new FilerClient("localhost", 18888); + + List<FilerProto.Entry> entries = filerClient.listEntries("/"); + + for (FilerProto.Entry entry : entries) { + System.out.println(entry.toString()); + } + + filerClient.mkdirs("/new_folder", 0755); + filerClient.touch("/new_folder/new_empty_file", 0755); + filerClient.touch("/new_folder/new_empty_file2", 0755); + filerClient.rm("/new_folder/new_empty_file", false, true); + filerClient.rm("/new_folder", true, true); + + } +} diff --git a/other/java/client/src/test/java/seaweedfs/client/SeaweedReadTest.java b/other/java/client/src/test/java/seaweedfs/client/SeaweedReadTest.java new file mode 100644 index 000000000..ccfcdb117 --- /dev/null +++ b/other/java/client/src/test/java/seaweedfs/client/SeaweedReadTest.java @@ -0,0 +1,63 @@ +package seaweedfs.client; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +public class SeaweedReadTest { + + @Test + public void testNonOverlappingVisibleIntervals() { + List<FilerProto.FileChunk> chunks = new ArrayList<>(); + chunks.add(FilerProto.FileChunk.newBuilder() + .setFileId("aaa") + .setOffset(0) + .setSize(100) + .setMtime(1000) + .build()); + chunks.add(FilerProto.FileChunk.newBuilder() + .setFileId("bbb") + .setOffset(100) + .setSize(133) + .setMtime(2000) + .build()); + + List<SeaweedRead.VisibleInterval> visibleIntervals = SeaweedRead.nonOverlappingVisibleIntervals(chunks); + for (SeaweedRead.VisibleInterval visibleInterval : visibleIntervals) { + System.out.println("visible:" + visibleInterval); + } + + Assert.assertEquals(visibleIntervals.size(), 2); + + SeaweedRead.VisibleInterval visibleInterval = visibleIntervals.get(0); + Assert.assertEquals(visibleInterval.start, 0); + Assert.assertEquals(visibleInterval.stop, 100); + Assert.assertEquals(visibleInterval.modifiedTime, 1000); + Assert.assertEquals(visibleInterval.fileId, "aaa"); + + visibleInterval = visibleIntervals.get(1); + Assert.assertEquals(visibleInterval.start, 100); + Assert.assertEquals(visibleInterval.stop, 233); + Assert.assertEquals(visibleInterval.modifiedTime, 2000); + Assert.assertEquals(visibleInterval.fileId, "bbb"); + + List<SeaweedRead.ChunkView> chunkViews = SeaweedRead.viewFromVisibles(visibleIntervals, 0, 233); + + SeaweedRead.ChunkView chunkView = chunkViews.get(0); + Assert.assertEquals(chunkView.offset, 0); + Assert.assertEquals(chunkView.size, 100); + Assert.assertEquals(chunkView.logicOffset, 0); + Assert.assertEquals(chunkView.fileId, "aaa"); + + chunkView = chunkViews.get(1); + Assert.assertEquals(chunkView.offset, 0); + Assert.assertEquals(chunkView.size, 133); + Assert.assertEquals(chunkView.logicOffset, 100); + Assert.assertEquals(chunkView.fileId, "bbb"); + + + } + +} diff --git a/other/java/hdfs2/dependency-reduced-pom.xml b/other/java/hdfs2/dependency-reduced-pom.xml new file mode 100644 index 000000000..d818bc878 --- /dev/null +++ b/other/java/hdfs2/dependency-reduced-pom.xml @@ -0,0 +1,133 @@ +<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <parent>
+ <artifactId>oss-parent</artifactId>
+ <groupId>org.sonatype.oss</groupId>
+ <version>9</version>
+ <relativePath>../pom.xml/pom.xml</relativePath>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.github.chrislusf</groupId>
+ <artifactId>seaweedfs-hadoop2-client</artifactId>
+ <version>${seaweedfs.client.version}</version>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>7</source>
+ <target>7</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>3.2.1</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ <exclude>org/slf4j/**</exclude>
+ <exclude>META-INF/maven/org.slf4j/**</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ <transformers>
+ <transformer />
+ </transformers>
+ <relocations>
+ <relocation>
+ <pattern>com.google</pattern>
+ <shadedPattern>shaded.com.google</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>io.grpc.internal</pattern>
+ <shadedPattern>shaded.io.grpc.internal</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.commons</pattern>
+ <shadedPattern>shaded.org.apache.commons</shadedPattern>
+ <excludes>
+ <exclude>org.apache.hadoop</exclude>
+ <exclude>org.apache.log4j</exclude>
+ </excludes>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.http</pattern>
+ <shadedPattern>shaded.org.apache.http</shadedPattern>
+ </relocation>
+ </relocations>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-gpg-plugin</artifactId>
+ <version>1.5</version>
+ <executions>
+ <execution>
+ <id>sign-artifacts</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>sign</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.sonatype.plugins</groupId>
+ <artifactId>nexus-staging-maven-plugin</artifactId>
+ <version>1.6.7</version>
+ <extensions>true</extensions>
+ <configuration>
+ <serverId>ossrh</serverId>
+ <nexusUrl>https://oss.sonatype.org/</nexusUrl>
+ <autoReleaseAfterClose>true</autoReleaseAfterClose>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-source-plugin</artifactId>
+ <version>2.2.1</version>
+ <executions>
+ <execution>
+ <id>attach-sources</id>
+ <goals>
+ <goal>jar-no-fork</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>2.9.1</version>
+ <executions>
+ <execution>
+ <id>attach-javadocs</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ <distributionManagement>
+ <snapshotRepository>
+ <id>ossrh</id>
+ <url>https://oss.sonatype.org/content/repositories/snapshots</url>
+ </snapshotRepository>
+ </distributionManagement>
+ <properties>
+ <seaweedfs.client.version>1.2.4</seaweedfs.client.version>
+ <hadoop.version>2.9.2</hadoop.version>
+ </properties>
+</project>
diff --git a/other/java/hdfs2/pom.xml b/other/java/hdfs2/pom.xml new file mode 100644 index 000000000..b8c8cb891 --- /dev/null +++ b/other/java/hdfs2/pom.xml @@ -0,0 +1,163 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <properties> + <seaweedfs.client.version>1.2.4</seaweedfs.client.version> + <hadoop.version>2.9.2</hadoop.version> + </properties> + + <groupId>com.github.chrislusf</groupId> + <artifactId>seaweedfs-hadoop2-client</artifactId> + <version>${seaweedfs.client.version}</version> + + <parent> + <groupId>org.sonatype.oss</groupId> + <artifactId>oss-parent</artifactId> + <version>9</version> + </parent> + + <distributionManagement> + <snapshotRepository> + <id>ossrh</id> + <url>https://oss.sonatype.org/content/repositories/snapshots</url> + </snapshotRepository> + </distributionManagement> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>7</source> + <target>7</target> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>3.2.1</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + <exclude>org/slf4j/**</exclude> + <exclude>META-INF/maven/org.slf4j/**</exclude> + </excludes> + </filter> + </filters> + <transformers> + <transformer + implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> + </transformers> + <relocations> + <relocation> + <pattern>com.google</pattern> + <shadedPattern>shaded.com.google</shadedPattern> + </relocation> + <relocation> + <pattern>io.grpc.internal</pattern> + <shadedPattern>shaded.io.grpc.internal</shadedPattern> + </relocation> + <relocation> + <pattern>org.apache.commons</pattern> + <shadedPattern>shaded.org.apache.commons</shadedPattern> + <excludes> + <exclude>org.apache.hadoop</exclude> + <exclude>org.apache.log4j</exclude> + </excludes> + </relocation> + <relocation> + <pattern>org.apache.http</pattern> + <shadedPattern>shaded.org.apache.http</shadedPattern> + </relocation> + </relocations> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-gpg-plugin</artifactId> + <version>1.5</version> + <executions> + <execution> + <id>sign-artifacts</id> + <phase>verify</phase> + <goals> + <goal>sign</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.sonatype.plugins</groupId> + <artifactId>nexus-staging-maven-plugin</artifactId> + <version>1.6.7</version> + <extensions>true</extensions> + <configuration> + <serverId>ossrh</serverId> + <nexusUrl>https://oss.sonatype.org/</nexusUrl> + <autoReleaseAfterClose>true</autoReleaseAfterClose> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-source-plugin</artifactId> + <version>2.2.1</version> + <executions> + <execution> + <id>attach-sources</id> + <goals> + <goal>jar-no-fork</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <version>2.9.1</version> + <executions> + <execution> + <id>attach-javadocs</id> + <goals> + <goal>jar</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>com.github.chrislusf</groupId> + <artifactId>seaweedfs-client</artifactId> + <version>${seaweedfs.client.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + </dependency> + </dependencies> + +</project> diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBuffer.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBuffer.java new file mode 100644 index 000000000..926d0b83b --- /dev/null +++ b/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBuffer.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package seaweed.hdfs; + +import java.util.concurrent.CountDownLatch; + +class ReadBuffer { + + private SeaweedInputStream stream; + private long offset; // offset within the file for the buffer + private int length; // actual length, set after the buffer is filles + private int requestedLength; // requested length of the read + private byte[] buffer; // the buffer itself + private int bufferindex = -1; // index in the buffers array in Buffer manager + private ReadBufferStatus status; // status of the buffer + private CountDownLatch latch = null; // signaled when the buffer is done reading, so any client + // waiting on this buffer gets unblocked + + // fields to help with eviction logic + private long timeStamp = 0; // tick at which buffer became available to read + private boolean isFirstByteConsumed = false; + private boolean isLastByteConsumed = false; + private boolean isAnyByteConsumed = false; + + public SeaweedInputStream getStream() { + return stream; + } + + public void setStream(SeaweedInputStream stream) { + this.stream = stream; + } + + public long getOffset() { + return offset; + } + + public void setOffset(long offset) { + this.offset = offset; + } + + public int getLength() { + return length; + } + + public void setLength(int length) { + this.length = length; + } + + public int getRequestedLength() { + return requestedLength; + } + + public void setRequestedLength(int requestedLength) { + this.requestedLength = requestedLength; + } + + public byte[] getBuffer() { + return buffer; + } + + public void setBuffer(byte[] buffer) { + this.buffer = buffer; + } + + public int getBufferindex() { + return bufferindex; + } + + public void setBufferindex(int bufferindex) { + this.bufferindex = bufferindex; + } + + public ReadBufferStatus getStatus() { + return status; + } + + public void setStatus(ReadBufferStatus status) { + this.status = status; + } + + public CountDownLatch getLatch() { + return latch; + } + + public void setLatch(CountDownLatch latch) { + this.latch = latch; + } + + public long getTimeStamp() { + return timeStamp; + } + + public void setTimeStamp(long timeStamp) { + this.timeStamp = timeStamp; + } + + public boolean isFirstByteConsumed() { + return isFirstByteConsumed; + } + + public void setFirstByteConsumed(boolean isFirstByteConsumed) { + this.isFirstByteConsumed = isFirstByteConsumed; + } + + public boolean isLastByteConsumed() { + return isLastByteConsumed; + } + + public void setLastByteConsumed(boolean isLastByteConsumed) { + this.isLastByteConsumed = isLastByteConsumed; + } + + public boolean isAnyByteConsumed() { + return isAnyByteConsumed; + } + + public void setAnyByteConsumed(boolean isAnyByteConsumed) { + this.isAnyByteConsumed = isAnyByteConsumed; + } + +} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferManager.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferManager.java new file mode 100644 index 000000000..5b1e21529 --- /dev/null +++ b/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferManager.java @@ -0,0 +1,394 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package seaweed.hdfs; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.Queue; +import java.util.Stack; +import java.util.concurrent.CountDownLatch; + +/** + * The Read Buffer Manager for Rest AbfsClient. + */ +final class ReadBufferManager { + private static final Logger LOGGER = LoggerFactory.getLogger(ReadBufferManager.class); + + private static final int NUM_BUFFERS = 16; + private static final int BLOCK_SIZE = 4 * 1024 * 1024; + private static final int NUM_THREADS = 8; + private static final int THRESHOLD_AGE_MILLISECONDS = 3000; // have to see if 3 seconds is a good threshold + + private Thread[] threads = new Thread[NUM_THREADS]; + private byte[][] buffers; // array of byte[] buffers, to hold the data that is read + private Stack<Integer> freeList = new Stack<>(); // indices in buffers[] array that are available + + private Queue<ReadBuffer> readAheadQueue = new LinkedList<>(); // queue of requests that are not picked up by any worker thread yet + private LinkedList<ReadBuffer> inProgressList = new LinkedList<>(); // requests being processed by worker threads + private LinkedList<ReadBuffer> completedReadList = new LinkedList<>(); // buffers available for reading + private static final ReadBufferManager BUFFER_MANAGER; // singleton, initialized in static initialization block + + static { + BUFFER_MANAGER = new ReadBufferManager(); + BUFFER_MANAGER.init(); + } + + static ReadBufferManager getBufferManager() { + return BUFFER_MANAGER; + } + + private void init() { + buffers = new byte[NUM_BUFFERS][]; + for (int i = 0; i < NUM_BUFFERS; i++) { + buffers[i] = new byte[BLOCK_SIZE]; // same buffers are reused. The byte array never goes back to GC + freeList.add(i); + } + for (int i = 0; i < NUM_THREADS; i++) { + Thread t = new Thread(new ReadBufferWorker(i)); + t.setDaemon(true); + threads[i] = t; + t.setName("SeaweedFS-prefetch-" + i); + t.start(); + } + ReadBufferWorker.UNLEASH_WORKERS.countDown(); + } + + // hide instance constructor + private ReadBufferManager() { + } + + + /* + * + * SeaweedInputStream-facing methods + * + */ + + + /** + * {@link SeaweedInputStream} calls this method to queue read-aheads. + * + * @param stream The {@link SeaweedInputStream} for which to do the read-ahead + * @param requestedOffset The offset in the file which shoukd be read + * @param requestedLength The length to read + */ + void queueReadAhead(final SeaweedInputStream stream, final long requestedOffset, final int requestedLength) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Start Queueing readAhead for {} offset {} length {}", + stream.getPath(), requestedOffset, requestedLength); + } + ReadBuffer buffer; + synchronized (this) { + if (isAlreadyQueued(stream, requestedOffset)) { + return; // already queued, do not queue again + } + if (freeList.isEmpty() && !tryEvict()) { + return; // no buffers available, cannot queue anything + } + + buffer = new ReadBuffer(); + buffer.setStream(stream); + buffer.setOffset(requestedOffset); + buffer.setLength(0); + buffer.setRequestedLength(requestedLength); + buffer.setStatus(ReadBufferStatus.NOT_AVAILABLE); + buffer.setLatch(new CountDownLatch(1)); + + Integer bufferIndex = freeList.pop(); // will return a value, since we have checked size > 0 already + + buffer.setBuffer(buffers[bufferIndex]); + buffer.setBufferindex(bufferIndex); + readAheadQueue.add(buffer); + notifyAll(); + } + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Done q-ing readAhead for file {} offset {} buffer idx {}", + stream.getPath(), requestedOffset, buffer.getBufferindex()); + } + } + + + /** + * {@link SeaweedInputStream} calls this method read any bytes already available in a buffer (thereby saving a + * remote read). This returns the bytes if the data already exists in buffer. If there is a buffer that is reading + * the requested offset, then this method blocks until that read completes. If the data is queued in a read-ahead + * but not picked up by a worker thread yet, then it cancels that read-ahead and reports cache miss. This is because + * depending on worker thread availability, the read-ahead may take a while - the calling thread can do it's own + * read to get the data faster (copmared to the read waiting in queue for an indeterminate amount of time). + * + * @param stream the file to read bytes for + * @param position the offset in the file to do a read for + * @param length the length to read + * @param buffer the buffer to read data into. Note that the buffer will be written into from offset 0. + * @return the number of bytes read + */ + int getBlock(final SeaweedInputStream stream, final long position, final int length, final byte[] buffer) { + // not synchronized, so have to be careful with locking + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("getBlock for file {} position {} thread {}", + stream.getPath(), position, Thread.currentThread().getName()); + } + + waitForProcess(stream, position); + + int bytesRead = 0; + synchronized (this) { + bytesRead = getBlockFromCompletedQueue(stream, position, length, buffer); + } + if (bytesRead > 0) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Done read from Cache for {} position {} length {}", + stream.getPath(), position, bytesRead); + } + return bytesRead; + } + + // otherwise, just say we got nothing - calling thread can do its own read + return 0; + } + + /* + * + * Internal methods + * + */ + + private void waitForProcess(final SeaweedInputStream stream, final long position) { + ReadBuffer readBuf; + synchronized (this) { + clearFromReadAheadQueue(stream, position); + readBuf = getFromList(inProgressList, stream, position); + } + if (readBuf != null) { // if in in-progress queue, then block for it + try { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("got a relevant read buffer for file {} offset {} buffer idx {}", + stream.getPath(), readBuf.getOffset(), readBuf.getBufferindex()); + } + readBuf.getLatch().await(); // blocking wait on the caller stream's thread + // Note on correctness: readBuf gets out of inProgressList only in 1 place: after worker thread + // is done processing it (in doneReading). There, the latch is set after removing the buffer from + // inProgressList. So this latch is safe to be outside the synchronized block. + // Putting it in synchronized would result in a deadlock, since this thread would be holding the lock + // while waiting, so no one will be able to change any state. If this becomes more complex in the future, + // then the latch cane be removed and replaced with wait/notify whenever inProgressList is touched. + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + } + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("latch done for file {} buffer idx {} length {}", + stream.getPath(), readBuf.getBufferindex(), readBuf.getLength()); + } + } + } + + /** + * If any buffer in the completedlist can be reclaimed then reclaim it and return the buffer to free list. + * The objective is to find just one buffer - there is no advantage to evicting more than one. + * + * @return whether the eviction succeeeded - i.e., were we able to free up one buffer + */ + private synchronized boolean tryEvict() { + ReadBuffer nodeToEvict = null; + if (completedReadList.size() <= 0) { + return false; // there are no evict-able buffers + } + + // first, try buffers where all bytes have been consumed (approximated as first and last bytes consumed) + for (ReadBuffer buf : completedReadList) { + if (buf.isFirstByteConsumed() && buf.isLastByteConsumed()) { + nodeToEvict = buf; + break; + } + } + if (nodeToEvict != null) { + return evict(nodeToEvict); + } + + // next, try buffers where any bytes have been consumed (may be a bad idea? have to experiment and see) + for (ReadBuffer buf : completedReadList) { + if (buf.isAnyByteConsumed()) { + nodeToEvict = buf; + break; + } + } + + if (nodeToEvict != null) { + return evict(nodeToEvict); + } + + // next, try any old nodes that have not been consumed + long earliestBirthday = Long.MAX_VALUE; + for (ReadBuffer buf : completedReadList) { + if (buf.getTimeStamp() < earliestBirthday) { + nodeToEvict = buf; + earliestBirthday = buf.getTimeStamp(); + } + } + if ((currentTimeMillis() - earliestBirthday > THRESHOLD_AGE_MILLISECONDS) && (nodeToEvict != null)) { + return evict(nodeToEvict); + } + + // nothing can be evicted + return false; + } + + private boolean evict(final ReadBuffer buf) { + freeList.push(buf.getBufferindex()); + completedReadList.remove(buf); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Evicting buffer idx {}; was used for file {} offset {} length {}", + buf.getBufferindex(), buf.getStream().getPath(), buf.getOffset(), buf.getLength()); + } + return true; + } + + private boolean isAlreadyQueued(final SeaweedInputStream stream, final long requestedOffset) { + // returns true if any part of the buffer is already queued + return (isInList(readAheadQueue, stream, requestedOffset) + || isInList(inProgressList, stream, requestedOffset) + || isInList(completedReadList, stream, requestedOffset)); + } + + private boolean isInList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) { + return (getFromList(list, stream, requestedOffset) != null); + } + + private ReadBuffer getFromList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) { + for (ReadBuffer buffer : list) { + if (buffer.getStream() == stream) { + if (buffer.getStatus() == ReadBufferStatus.AVAILABLE + && requestedOffset >= buffer.getOffset() + && requestedOffset < buffer.getOffset() + buffer.getLength()) { + return buffer; + } else if (requestedOffset >= buffer.getOffset() + && requestedOffset < buffer.getOffset() + buffer.getRequestedLength()) { + return buffer; + } + } + } + return null; + } + + private void clearFromReadAheadQueue(final SeaweedInputStream stream, final long requestedOffset) { + ReadBuffer buffer = getFromList(readAheadQueue, stream, requestedOffset); + if (buffer != null) { + readAheadQueue.remove(buffer); + notifyAll(); // lock is held in calling method + freeList.push(buffer.getBufferindex()); + } + } + + private int getBlockFromCompletedQueue(final SeaweedInputStream stream, final long position, final int length, + final byte[] buffer) { + ReadBuffer buf = getFromList(completedReadList, stream, position); + if (buf == null || position >= buf.getOffset() + buf.getLength()) { + return 0; + } + int cursor = (int) (position - buf.getOffset()); + int availableLengthInBuffer = buf.getLength() - cursor; + int lengthToCopy = Math.min(length, availableLengthInBuffer); + System.arraycopy(buf.getBuffer(), cursor, buffer, 0, lengthToCopy); + if (cursor == 0) { + buf.setFirstByteConsumed(true); + } + if (cursor + lengthToCopy == buf.getLength()) { + buf.setLastByteConsumed(true); + } + buf.setAnyByteConsumed(true); + return lengthToCopy; + } + + /* + * + * ReadBufferWorker-thread-facing methods + * + */ + + /** + * ReadBufferWorker thread calls this to get the next buffer that it should work on. + * + * @return {@link ReadBuffer} + * @throws InterruptedException if thread is interrupted + */ + ReadBuffer getNextBlockToRead() throws InterruptedException { + ReadBuffer buffer = null; + synchronized (this) { + //buffer = readAheadQueue.take(); // blocking method + while (readAheadQueue.size() == 0) { + wait(); + } + buffer = readAheadQueue.remove(); + notifyAll(); + if (buffer == null) { + return null; // should never happen + } + buffer.setStatus(ReadBufferStatus.READING_IN_PROGRESS); + inProgressList.add(buffer); + } + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("ReadBufferWorker picked file {} for offset {}", + buffer.getStream().getPath(), buffer.getOffset()); + } + return buffer; + } + + /** + * ReadBufferWorker thread calls this method to post completion. + * + * @param buffer the buffer whose read was completed + * @param result the {@link ReadBufferStatus} after the read operation in the worker thread + * @param bytesActuallyRead the number of bytes that the worker thread was actually able to read + */ + void doneReading(final ReadBuffer buffer, final ReadBufferStatus result, final int bytesActuallyRead) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("ReadBufferWorker completed file {} for offset {} bytes {}", + buffer.getStream().getPath(), buffer.getOffset(), bytesActuallyRead); + } + synchronized (this) { + inProgressList.remove(buffer); + if (result == ReadBufferStatus.AVAILABLE && bytesActuallyRead > 0) { + buffer.setStatus(ReadBufferStatus.AVAILABLE); + buffer.setTimeStamp(currentTimeMillis()); + buffer.setLength(bytesActuallyRead); + completedReadList.add(buffer); + } else { + freeList.push(buffer.getBufferindex()); + // buffer should go out of scope after the end of the calling method in ReadBufferWorker, and eligible for GC + } + } + //outside the synchronized, since anyone receiving a wake-up from the latch must see safe-published results + buffer.getLatch().countDown(); // wake up waiting threads (if any) + } + + /** + * Similar to System.currentTimeMillis, except implemented with System.nanoTime(). + * System.currentTimeMillis can go backwards when system clock is changed (e.g., with NTP time synchronization), + * making it unsuitable for measuring time intervals. nanotime is strictly monotonically increasing per CPU core. + * Note: it is not monotonic across Sockets, and even within a CPU, its only the + * more recent parts which share a clock across all cores. + * + * @return current time in milliseconds + */ + private long currentTimeMillis() { + return System.nanoTime() / 1000 / 1000; + } +} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferStatus.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferStatus.java new file mode 100644 index 000000000..d63674977 --- /dev/null +++ b/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferStatus.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package seaweed.hdfs; + +/** + * The ReadBufferStatus for Rest AbfsClient + */ +public enum ReadBufferStatus { + NOT_AVAILABLE, // buffers sitting in readaheadqueue have this stats + READING_IN_PROGRESS, // reading is in progress on this buffer. Buffer should be in inProgressList + AVAILABLE, // data is available in buffer. It should be in completedList + READ_FAILED // read completed, but failed. +} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferWorker.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferWorker.java new file mode 100644 index 000000000..6ffbc4644 --- /dev/null +++ b/other/java/hdfs2/src/main/java/seaweed/hdfs/ReadBufferWorker.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package seaweed.hdfs; + +import java.util.concurrent.CountDownLatch; + +class ReadBufferWorker implements Runnable { + + protected static final CountDownLatch UNLEASH_WORKERS = new CountDownLatch(1); + private int id; + + ReadBufferWorker(final int id) { + this.id = id; + } + + /** + * return the ID of ReadBufferWorker. + */ + public int getId() { + return this.id; + } + + /** + * Waits until a buffer becomes available in ReadAheadQueue. + * Once a buffer becomes available, reads the file specified in it and then posts results back to buffer manager. + * Rinse and repeat. Forever. + */ + public void run() { + try { + UNLEASH_WORKERS.await(); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + } + ReadBufferManager bufferManager = ReadBufferManager.getBufferManager(); + ReadBuffer buffer; + while (true) { + try { + buffer = bufferManager.getNextBlockToRead(); // blocks, until a buffer is available for this thread + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + return; + } + if (buffer != null) { + try { + // do the actual read, from the file. + int bytesRead = buffer.getStream().readRemote(buffer.getOffset(), buffer.getBuffer(), 0, buffer.getRequestedLength()); + bufferManager.doneReading(buffer, ReadBufferStatus.AVAILABLE, bytesRead); // post result back to ReadBufferManager + } catch (Exception ex) { + bufferManager.doneReading(buffer, ReadBufferStatus.READ_FAILED, 0); + } + } + } + } +} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java new file mode 100644 index 000000000..d471d8440 --- /dev/null +++ b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java @@ -0,0 +1,620 @@ +package seaweed.hdfs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Progressable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; + +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; + +public class SeaweedFileSystem extends org.apache.hadoop.fs.FileSystem { + + public static final int FS_SEAWEED_DEFAULT_PORT = 8888; + public static final String FS_SEAWEED_FILER_HOST = "fs.seaweed.filer.host"; + public static final String FS_SEAWEED_FILER_PORT = "fs.seaweed.filer.port"; + + private static final Logger LOG = LoggerFactory.getLogger(SeaweedFileSystem.class); + private static int BUFFER_SIZE = 16 * 1024 * 1024; + + private URI uri; + private Path workingDirectory = new Path("/"); + private SeaweedFileSystemStore seaweedFileSystemStore; + + public URI getUri() { + return uri; + } + + public String getScheme() { + return "seaweedfs"; + } + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { // get + super.initialize(uri, conf); + + // get host information from uri (overrides info in conf) + String host = uri.getHost(); + host = (host == null) ? conf.get(FS_SEAWEED_FILER_HOST, "localhost") : host; + if (host == null) { + throw new IOException("Invalid host specified"); + } + conf.set(FS_SEAWEED_FILER_HOST, host); + + // get port information from uri, (overrides info in conf) + int port = uri.getPort(); + port = (port == -1) ? FS_SEAWEED_DEFAULT_PORT : port; + conf.setInt(FS_SEAWEED_FILER_PORT, port); + + conf.setInt(IO_FILE_BUFFER_SIZE_KEY, BUFFER_SIZE); + + setConf(conf); + this.uri = uri; + + seaweedFileSystemStore = new SeaweedFileSystemStore(host, port); + + } + + @Override + public FSDataInputStream open(Path path, int bufferSize) throws IOException { + + LOG.debug("open path: {} bufferSize:{}", path, bufferSize); + + path = qualify(path); + + try { + InputStream inputStream = seaweedFileSystemStore.openFileForRead(path, statistics, bufferSize); + return new FSDataInputStream(inputStream); + } catch (Exception ex) { + LOG.warn("open path: {} bufferSize:{}", path, bufferSize, ex); + return null; + } + } + + @Override + public FSDataOutputStream create(Path path, FsPermission permission, final boolean overwrite, final int bufferSize, + final short replication, final long blockSize, final Progressable progress) throws IOException { + + LOG.debug("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize); + + path = qualify(path); + + try { + String replicaPlacement = String.format("%03d", replication - 1); + OutputStream outputStream = seaweedFileSystemStore.createFile(path, overwrite, permission, bufferSize, replicaPlacement); + return new FSDataOutputStream(outputStream, statistics); + } catch (Exception ex) { + LOG.warn("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize, ex); + return null; + } + } + + /** + * {@inheritDoc} + * @throws FileNotFoundException if the parent directory is not present -or + * is not a directory. + */ + @Override + public FSDataOutputStream createNonRecursive(Path path, + FsPermission permission, + EnumSet<CreateFlag> flags, + int bufferSize, + short replication, + long blockSize, + Progressable progress) throws IOException { + Path parent = path.getParent(); + if (parent != null) { + // expect this to raise an exception if there is no parent + if (!getFileStatus(parent).isDirectory()) { + throw new FileAlreadyExistsException("Not a directory: " + parent); + } + } + return create(path, permission, + flags.contains(CreateFlag.OVERWRITE), bufferSize, + replication, blockSize, progress); + } + + @Override + public FSDataOutputStream append(Path path, int bufferSize, Progressable progressable) throws IOException { + + LOG.debug("append path: {} bufferSize:{}", path, bufferSize); + + path = qualify(path); + try { + OutputStream outputStream = seaweedFileSystemStore.createFile(path, false, null, bufferSize, ""); + return new FSDataOutputStream(outputStream, statistics); + } catch (Exception ex) { + LOG.warn("append path: {} bufferSize:{}", path, bufferSize, ex); + return null; + } + } + + @Override + public boolean rename(Path src, Path dst) { + + LOG.debug("rename path: {} => {}", src, dst); + + if (src.isRoot()) { + return false; + } + + if (src.equals(dst)) { + return true; + } + FileStatus dstFileStatus = getFileStatus(dst); + + String sourceFileName = src.getName(); + Path adjustedDst = dst; + + if (dstFileStatus != null) { + if (!dstFileStatus.isDirectory()) { + return false; + } + adjustedDst = new Path(dst, sourceFileName); + } + + Path qualifiedSrcPath = qualify(src); + Path qualifiedDstPath = qualify(adjustedDst); + + seaweedFileSystemStore.rename(qualifiedSrcPath, qualifiedDstPath); + return true; + } + + @Override + public boolean delete(Path path, boolean recursive) { + + LOG.debug("delete path: {} recursive:{}", path, recursive); + + path = qualify(path); + + FileStatus fileStatus = getFileStatus(path); + + if (fileStatus == null) { + return true; + } + + return seaweedFileSystemStore.deleteEntries(path, fileStatus.isDirectory(), recursive); + + } + + @Override + public FileStatus[] listStatus(Path path) throws IOException { + + LOG.debug("listStatus path: {}", path); + + path = qualify(path); + + return seaweedFileSystemStore.listEntries(path); + } + + @Override + public Path getWorkingDirectory() { + return workingDirectory; + } + + @Override + public void setWorkingDirectory(Path path) { + if (path.isAbsolute()) { + workingDirectory = path; + } else { + workingDirectory = new Path(workingDirectory, path); + } + } + + @Override + public boolean mkdirs(Path path, FsPermission fsPermission) throws IOException { + + LOG.debug("mkdirs path: {}", path); + + path = qualify(path); + + FileStatus fileStatus = getFileStatus(path); + + if (fileStatus == null) { + + UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); + return seaweedFileSystemStore.createDirectory(path, currentUser, + fsPermission == null ? FsPermission.getDirDefault() : fsPermission, + FsPermission.getUMask(getConf())); + + } + + if (fileStatus.isDirectory()) { + return true; + } else { + throw new FileAlreadyExistsException("Path is a file: " + path); + } + } + + @Override + public FileStatus getFileStatus(Path path) { + + LOG.debug("getFileStatus path: {}", path); + + path = qualify(path); + + return seaweedFileSystemStore.getFileStatus(path); + } + + /** + * Set owner of a path (i.e. a file or a directory). + * The parameters owner and group cannot both be null. + * + * @param path The path + * @param owner If it is null, the original username remains unchanged. + * @param group If it is null, the original groupname remains unchanged. + */ + @Override + public void setOwner(Path path, final String owner, final String group) + throws IOException { + LOG.debug("setOwner path: {}", path); + path = qualify(path); + + seaweedFileSystemStore.setOwner(path, owner, group); + } + + + /** + * Set permission of a path. + * + * @param path The path + * @param permission Access permission + */ + @Override + public void setPermission(Path path, final FsPermission permission) throws IOException { + LOG.debug("setPermission path: {}", path); + + if (permission == null) { + throw new IllegalArgumentException("The permission can't be null"); + } + + path = qualify(path); + + seaweedFileSystemStore.setPermission(path, permission); + } + + Path qualify(Path path) { + return path.makeQualified(uri, workingDirectory); + } + + /** + * Concat existing files together. + * + * @param trg the path to the target destination. + * @param psrcs the paths to the sources to use for the concatenation. + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default). + */ + @Override + public void concat(final Path trg, final Path[] psrcs) throws IOException { + throw new UnsupportedOperationException("Not implemented by the " + + getClass().getSimpleName() + " FileSystem implementation"); + } + + /** + * Truncate the file in the indicated path to the indicated size. + * <ul> + * <li>Fails if path is a directory.</li> + * <li>Fails if path does not exist.</li> + * <li>Fails if path is not closed.</li> + * <li>Fails if new size is greater than current size.</li> + * </ul> + * + * @param f The path to the file to be truncated + * @param newLength The size the file is to be truncated to + * @return <code>true</code> if the file has been truncated to the desired + * <code>newLength</code> and is immediately available to be reused for + * write operations such as <code>append</code>, or + * <code>false</code> if a background process of adjusting the length of + * the last block has been started, and clients should wait for it to + * complete before proceeding with further file updates. + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default). + */ + @Override + public boolean truncate(Path f, long newLength) throws IOException { + throw new UnsupportedOperationException("Not implemented by the " + + getClass().getSimpleName() + " FileSystem implementation"); + } + + @Override + public void createSymlink(final Path target, final Path link, + final boolean createParent) throws AccessControlException, + FileAlreadyExistsException, FileNotFoundException, + ParentNotDirectoryException, UnsupportedFileSystemException, + IOException { + // Supporting filesystems should override this method + throw new UnsupportedOperationException( + "Filesystem does not support symlinks!"); + } + + public boolean supportsSymlinks() { + return false; + } + + /** + * Create a snapshot. + * + * @param path The directory where snapshots will be taken. + * @param snapshotName The name of the snapshot + * @return the snapshot path. + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + */ + @Override + public Path createSnapshot(Path path, String snapshotName) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support createSnapshot"); + } + + /** + * Rename a snapshot. + * + * @param path The directory path where the snapshot was taken + * @param snapshotOldName Old name of the snapshot + * @param snapshotNewName New name of the snapshot + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void renameSnapshot(Path path, String snapshotOldName, + String snapshotNewName) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support renameSnapshot"); + } + + /** + * Delete a snapshot of a directory. + * + * @param path The directory that the to-be-deleted snapshot belongs to + * @param snapshotName The name of the snapshot + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void deleteSnapshot(Path path, String snapshotName) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support deleteSnapshot"); + } + + /** + * Modifies ACL entries of files and directories. This method can add new ACL + * entries or modify the permissions on existing ACL entries. All existing + * ACL entries that are not specified in this call are retained without + * changes. (Modifications are merged into the current ACL.) + * + * @param path Path to modify + * @param aclSpec List<AclEntry> describing modifications + * @throws IOException if an ACL could not be modified + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void modifyAclEntries(Path path, List<AclEntry> aclSpec) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support modifyAclEntries"); + } + + /** + * Removes ACL entries from files and directories. Other ACL entries are + * retained. + * + * @param path Path to modify + * @param aclSpec List describing entries to remove + * @throws IOException if an ACL could not be modified + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void removeAclEntries(Path path, List<AclEntry> aclSpec) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support removeAclEntries"); + } + + /** + * Removes all default ACL entries from files and directories. + * + * @param path Path to modify + * @throws IOException if an ACL could not be modified + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void removeDefaultAcl(Path path) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support removeDefaultAcl"); + } + + /** + * Removes all but the base ACL entries of files and directories. The entries + * for user, group, and others are retained for compatibility with permission + * bits. + * + * @param path Path to modify + * @throws IOException if an ACL could not be removed + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void removeAcl(Path path) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support removeAcl"); + } + + /** + * Fully replaces ACL of files and directories, discarding all existing + * entries. + * + * @param path Path to modify + * @param aclSpec List describing modifications, which must include entries + * for user, group, and others for compatibility with permission bits. + * @throws IOException if an ACL could not be modified + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void setAcl(Path path, List<AclEntry> aclSpec) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support setAcl"); + } + + /** + * Gets the ACL of a file or directory. + * + * @param path Path to get + * @return AclStatus describing the ACL of the file or directory + * @throws IOException if an ACL could not be read + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public AclStatus getAclStatus(Path path) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support getAclStatus"); + } + + /** + * Set an xattr of a file or directory. + * The name must be prefixed with the namespace followed by ".". For example, + * "user.attr". + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to modify + * @param name xattr name. + * @param value xattr value. + * @param flag xattr set flag + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void setXAttr(Path path, String name, byte[] value, + EnumSet<XAttrSetFlag> flag) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support setXAttr"); + } + + /** + * Get an xattr name and value for a file or directory. + * The name must be prefixed with the namespace followed by ".". For example, + * "user.attr". + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to get extended attribute + * @param name xattr name. + * @return byte[] xattr value. + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public byte[] getXAttr(Path path, String name) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support getXAttr"); + } + + /** + * Get all of the xattr name/value pairs for a file or directory. + * Only those xattrs which the logged-in user has permissions to view + * are returned. + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to get extended attributes + * @return Map describing the XAttrs of the file or directory + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public Map<String, byte[]> getXAttrs(Path path) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support getXAttrs"); + } + + /** + * Get all of the xattrs name/value pairs for a file or directory. + * Only those xattrs which the logged-in user has permissions to view + * are returned. + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to get extended attributes + * @param names XAttr names. + * @return Map describing the XAttrs of the file or directory + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public Map<String, byte[]> getXAttrs(Path path, List<String> names) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support getXAttrs"); + } + + /** + * Get all of the xattr names for a file or directory. + * Only those xattr names which the logged-in user has permissions to view + * are returned. + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to get extended attributes + * @return List{@literal <String>} of the XAttr names of the file or directory + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public List<String> listXAttrs(Path path) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support listXAttrs"); + } + + /** + * Remove an xattr of a file or directory. + * The name must be prefixed with the namespace followed by ".". For example, + * "user.attr". + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to remove extended attribute + * @param name xattr name + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void removeXAttr(Path path, String name) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support removeXAttr"); + } + +} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java new file mode 100644 index 000000000..774c090e8 --- /dev/null +++ b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java @@ -0,0 +1,270 @@ +package seaweed.hdfs; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import seaweedfs.client.FilerClient; +import seaweedfs.client.FilerGrpcClient; +import seaweedfs.client.FilerProto; +import seaweedfs.client.SeaweedRead; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class SeaweedFileSystemStore { + + private static final Logger LOG = LoggerFactory.getLogger(SeaweedFileSystemStore.class); + + private FilerGrpcClient filerGrpcClient; + private FilerClient filerClient; + + public SeaweedFileSystemStore(String host, int port) { + int grpcPort = 10000 + port; + filerGrpcClient = new FilerGrpcClient(host, grpcPort); + filerClient = new FilerClient(filerGrpcClient); + } + + public static String getParentDirectory(Path path) { + return path.isRoot() ? "/" : path.getParent().toUri().getPath(); + } + + static int permissionToMode(FsPermission permission, boolean isDirectory) { + int p = permission.toShort(); + if (isDirectory) { + p = p | 1 << 31; + } + return p; + } + + public boolean createDirectory(final Path path, UserGroupInformation currentUser, + final FsPermission permission, final FsPermission umask) { + + LOG.debug("createDirectory path: {} permission: {} umask: {}", + path, + permission, + umask); + + return filerClient.mkdirs( + path.toUri().getPath(), + permissionToMode(permission, true), + currentUser.getUserName(), + currentUser.getGroupNames() + ); + } + + public FileStatus[] listEntries(final Path path) { + LOG.debug("listEntries path: {}", path); + + List<FileStatus> fileStatuses = new ArrayList<FileStatus>(); + + List<FilerProto.Entry> entries = filerClient.listEntries(path.toUri().getPath()); + + for (FilerProto.Entry entry : entries) { + + FileStatus fileStatus = doGetFileStatus(new Path(path, entry.getName()), entry); + + fileStatuses.add(fileStatus); + } + return fileStatuses.toArray(new FileStatus[0]); + } + + public FileStatus getFileStatus(final Path path) { + + FilerProto.Entry entry = lookupEntry(path); + if (entry == null) { + return null; + } + LOG.debug("doGetFileStatus path:{} entry:{}", path, entry); + + FileStatus fileStatus = doGetFileStatus(path, entry); + return fileStatus; + } + + public boolean deleteEntries(final Path path, boolean isDirectory, boolean recursive) { + LOG.debug("deleteEntries path: {} isDirectory {} recursive: {}", + path, + String.valueOf(isDirectory), + String.valueOf(recursive)); + + if (path.isRoot()) { + return true; + } + + if (recursive && isDirectory) { + List<FilerProto.Entry> entries = filerClient.listEntries(path.toUri().getPath()); + for (FilerProto.Entry entry : entries) { + deleteEntries(new Path(path, entry.getName()), entry.getIsDirectory(), true); + } + } + + return filerClient.deleteEntry(getParentDirectory(path), path.getName(), true, recursive, true); + } + + private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) { + FilerProto.FuseAttributes attributes = entry.getAttributes(); + long length = SeaweedRead.totalSize(entry.getChunksList()); + boolean isDir = entry.getIsDirectory(); + int block_replication = 1; + int blocksize = 512; + long modification_time = attributes.getMtime() * 1000; // milliseconds + long access_time = 0; + FsPermission permission = FsPermission.createImmutable((short) attributes.getFileMode()); + String owner = attributes.getUserName(); + String group = attributes.getGroupNameCount() > 0 ? attributes.getGroupName(0) : ""; + return new FileStatus(length, isDir, block_replication, blocksize, + modification_time, access_time, permission, owner, group, null, path); + } + + private FilerProto.Entry lookupEntry(Path path) { + + return filerClient.lookupEntry(getParentDirectory(path), path.getName()); + + } + + public void rename(Path source, Path destination) { + + LOG.debug("rename source: {} destination:{}", source, destination); + + if (source.isRoot()) { + return; + } + LOG.info("rename source: {} destination:{}", source, destination); + FilerProto.Entry entry = lookupEntry(source); + if (entry == null) { + LOG.warn("rename non-existing source: {}", source); + return; + } + filerClient.mv(source.toUri().getPath(), destination.toUri().getPath()); + } + + public OutputStream createFile(final Path path, + final boolean overwrite, + FsPermission permission, + int bufferSize, + String replication) throws IOException { + + permission = permission == null ? FsPermission.getFileDefault() : permission; + + LOG.debug("createFile path: {} overwrite: {} permission: {}", + path, + overwrite, + permission.toString()); + + UserGroupInformation userGroupInformation = UserGroupInformation.getCurrentUser(); + long now = System.currentTimeMillis() / 1000L; + + FilerProto.Entry.Builder entry = null; + long writePosition = 0; + if (!overwrite) { + FilerProto.Entry existingEntry = lookupEntry(path); + LOG.debug("createFile merged entry path:{} existingEntry:{}", path, existingEntry); + if (existingEntry != null) { + entry = FilerProto.Entry.newBuilder(); + entry.mergeFrom(existingEntry); + entry.getAttributesBuilder().setMtime(now); + LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry); + writePosition = SeaweedRead.totalSize(existingEntry.getChunksList()); + replication = existingEntry.getAttributes().getReplication(); + } + } + if (entry == null) { + entry = FilerProto.Entry.newBuilder() + .setName(path.getName()) + .setIsDirectory(false) + .setAttributes(FilerProto.FuseAttributes.newBuilder() + .setFileMode(permissionToMode(permission, false)) + .setReplication(replication) + .setCrtime(now) + .setMtime(now) + .setUserName(userGroupInformation.getUserName()) + .clearGroupName() + .addAllGroupName(Arrays.asList(userGroupInformation.getGroupNames())) + ); + } + + return new SeaweedOutputStream(filerGrpcClient, path, entry, writePosition, bufferSize, replication); + + } + + public InputStream openFileForRead(final Path path, FileSystem.Statistics statistics, + int bufferSize) throws IOException { + + LOG.debug("openFileForRead path:{} bufferSize:{}", path, bufferSize); + + int readAheadQueueDepth = 2; + FilerProto.Entry entry = lookupEntry(path); + + if (entry == null) { + throw new FileNotFoundException("read non-exist file " + path); + } + + return new SeaweedInputStream(filerGrpcClient, + statistics, + path.toUri().getPath(), + entry, + bufferSize, + readAheadQueueDepth); + } + + public void setOwner(Path path, String owner, String group) { + + LOG.debug("setOwner path:{} owner:{} group:{}", path, owner, group); + + FilerProto.Entry entry = lookupEntry(path); + if (entry == null) { + LOG.debug("setOwner path:{} entry:{}", path, entry); + return; + } + + FilerProto.Entry.Builder entryBuilder = entry.toBuilder(); + FilerProto.FuseAttributes.Builder attributesBuilder = entry.getAttributes().toBuilder(); + + if (owner != null) { + attributesBuilder.setUserName(owner); + } + if (group != null) { + attributesBuilder.clearGroupName(); + attributesBuilder.addGroupName(group); + } + + entryBuilder.setAttributes(attributesBuilder); + + LOG.debug("setOwner path:{} entry:{}", path, entryBuilder); + + filerClient.updateEntry(getParentDirectory(path), entryBuilder.build()); + + } + + public void setPermission(Path path, FsPermission permission) { + + LOG.debug("setPermission path:{} permission:{}", path, permission); + + FilerProto.Entry entry = lookupEntry(path); + if (entry == null) { + LOG.debug("setPermission path:{} entry:{}", path, entry); + return; + } + + FilerProto.Entry.Builder entryBuilder = entry.toBuilder(); + FilerProto.FuseAttributes.Builder attributesBuilder = entry.getAttributes().toBuilder(); + + attributesBuilder.setFileMode(permissionToMode(permission, entry.getIsDirectory())); + + entryBuilder.setAttributes(attributesBuilder); + + LOG.debug("setPermission path:{} entry:{}", path, entryBuilder); + + filerClient.updateEntry(getParentDirectory(path), entryBuilder.build()); + + } + +} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java new file mode 100644 index 000000000..90c14c772 --- /dev/null +++ b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedInputStream.java @@ -0,0 +1,371 @@ +package seaweed.hdfs; + +// based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream + +import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.FSExceptionMessages; +import org.apache.hadoop.fs.FSInputStream; +import org.apache.hadoop.fs.FileSystem.Statistics; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import seaweedfs.client.FilerGrpcClient; +import seaweedfs.client.FilerProto; +import seaweedfs.client.SeaweedRead; + +import java.io.EOFException; +import java.io.IOException; +import java.util.List; + +public class SeaweedInputStream extends FSInputStream { + + private static final Logger LOG = LoggerFactory.getLogger(SeaweedInputStream.class); + + private final FilerGrpcClient filerGrpcClient; + private final Statistics statistics; + private final String path; + private final FilerProto.Entry entry; + private final List<SeaweedRead.VisibleInterval> visibleIntervalList; + private final long contentLength; + private final int bufferSize; // default buffer size + private final int readAheadQueueDepth; // initialized in constructor + private final boolean readAheadEnabled; // whether enable readAhead; + + private byte[] buffer = null; // will be initialized on first use + + private long fCursor = 0; // cursor of buffer within file - offset of next byte to read from remote server + private long fCursorAfterLastRead = -1; + private int bCursor = 0; // cursor of read within buffer - offset of next byte to be returned from buffer + private int limit = 0; // offset of next byte to be read into buffer from service (i.e., upper marker+1 + // of valid bytes in buffer) + private boolean closed = false; + + public SeaweedInputStream( + final FilerGrpcClient filerGrpcClient, + final Statistics statistics, + final String path, + final FilerProto.Entry entry, + final int bufferSize, + final int readAheadQueueDepth) { + this.filerGrpcClient = filerGrpcClient; + this.statistics = statistics; + this.path = path; + this.entry = entry; + this.contentLength = SeaweedRead.totalSize(entry.getChunksList()); + this.bufferSize = bufferSize; + this.readAheadQueueDepth = (readAheadQueueDepth >= 0) ? readAheadQueueDepth : Runtime.getRuntime().availableProcessors(); + this.readAheadEnabled = true; + + this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(entry.getChunksList()); + + LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList); + + } + + public String getPath() { + return path; + } + + @Override + public int read() throws IOException { + byte[] b = new byte[1]; + int numberOfBytesRead = read(b, 0, 1); + if (numberOfBytesRead < 0) { + return -1; + } else { + return (b[0] & 0xFF); + } + } + + @Override + public synchronized int read(final byte[] b, final int off, final int len) throws IOException { + int currentOff = off; + int currentLen = len; + int lastReadBytes; + int totalReadBytes = 0; + do { + lastReadBytes = readOneBlock(b, currentOff, currentLen); + if (lastReadBytes > 0) { + currentOff += lastReadBytes; + currentLen -= lastReadBytes; + totalReadBytes += lastReadBytes; + } + if (currentLen <= 0 || currentLen > b.length - currentOff) { + break; + } + } while (lastReadBytes > 0); + return totalReadBytes > 0 ? totalReadBytes : lastReadBytes; + } + + private int readOneBlock(final byte[] b, final int off, final int len) throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + + Preconditions.checkNotNull(b); + + if (len == 0) { + return 0; + } + + if (this.available() == 0) { + return -1; + } + + if (off < 0 || len < 0 || len > b.length - off) { + throw new IndexOutOfBoundsException(); + } + + //If buffer is empty, then fill the buffer. + if (bCursor == limit) { + //If EOF, then return -1 + if (fCursor >= contentLength) { + return -1; + } + + long bytesRead = 0; + //reset buffer to initial state - i.e., throw away existing data + bCursor = 0; + limit = 0; + if (buffer == null) { + buffer = new byte[bufferSize]; + } + + // Enable readAhead when reading sequentially + if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize) { + bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false); + } else { + bytesRead = readInternal(fCursor, buffer, 0, b.length, true); + } + + if (bytesRead == -1) { + return -1; + } + + limit += bytesRead; + fCursor += bytesRead; + fCursorAfterLastRead = fCursor; + } + + //If there is anything in the buffer, then return lesser of (requested bytes) and (bytes in buffer) + //(bytes returned may be less than requested) + int bytesRemaining = limit - bCursor; + int bytesToRead = Math.min(len, bytesRemaining); + System.arraycopy(buffer, bCursor, b, off, bytesToRead); + bCursor += bytesToRead; + if (statistics != null) { + statistics.incrementBytesRead(bytesToRead); + } + return bytesToRead; + } + + + private int readInternal(final long position, final byte[] b, final int offset, final int length, + final boolean bypassReadAhead) throws IOException { + if (readAheadEnabled && !bypassReadAhead) { + // try reading from read-ahead + if (offset != 0) { + throw new IllegalArgumentException("readahead buffers cannot have non-zero buffer offsets"); + } + int receivedBytes; + + // queue read-aheads + int numReadAheads = this.readAheadQueueDepth; + long nextSize; + long nextOffset = position; + while (numReadAheads > 0 && nextOffset < contentLength) { + nextSize = Math.min((long) bufferSize, contentLength - nextOffset); + ReadBufferManager.getBufferManager().queueReadAhead(this, nextOffset, (int) nextSize); + nextOffset = nextOffset + nextSize; + numReadAheads--; + } + + // try reading from buffers first + receivedBytes = ReadBufferManager.getBufferManager().getBlock(this, position, length, b); + if (receivedBytes > 0) { + return receivedBytes; + } + + // got nothing from read-ahead, do our own read now + receivedBytes = readRemote(position, b, offset, length); + return receivedBytes; + } else { + return readRemote(position, b, offset, length); + } + } + + int readRemote(long position, byte[] b, int offset, int length) throws IOException { + if (position < 0) { + throw new IllegalArgumentException("attempting to read from negative offset"); + } + if (position >= contentLength) { + return -1; // Hadoop prefers -1 to EOFException + } + if (b == null) { + throw new IllegalArgumentException("null byte array passed in to read() method"); + } + if (offset >= b.length) { + throw new IllegalArgumentException("offset greater than length of array"); + } + if (length < 0) { + throw new IllegalArgumentException("requested read length is less than zero"); + } + if (length > (b.length - offset)) { + throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer"); + } + + long bytesRead = SeaweedRead.read(filerGrpcClient, visibleIntervalList, position, b, offset, length); + if (bytesRead > Integer.MAX_VALUE) { + throw new IOException("Unexpected Content-Length"); + } + return (int) bytesRead; + } + + /** + * Seek to given position in stream. + * + * @param n position to seek to + * @throws IOException if there is an error + * @throws EOFException if attempting to seek past end of file + */ + @Override + public synchronized void seek(long n) throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + if (n < 0) { + throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); + } + if (n > contentLength) { + throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); + } + + if (n >= fCursor - limit && n <= fCursor) { // within buffer + bCursor = (int) (n - (fCursor - limit)); + return; + } + + // next read will read from here + fCursor = n; + + //invalidate buffer + limit = 0; + bCursor = 0; + } + + @Override + public synchronized long skip(long n) throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + long currentPos = getPos(); + if (currentPos == contentLength) { + if (n > 0) { + throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); + } + } + long newPos = currentPos + n; + if (newPos < 0) { + newPos = 0; + n = newPos - currentPos; + } + if (newPos > contentLength) { + newPos = contentLength; + n = newPos - currentPos; + } + seek(newPos); + return n; + } + + /** + * Return the size of the remaining available bytes + * if the size is less than or equal to {@link Integer#MAX_VALUE}, + * otherwise, return {@link Integer#MAX_VALUE}. + * <p> + * This is to match the behavior of DFSInputStream.available(), + * which some clients may rely on (HBase write-ahead log reading in + * particular). + */ + @Override + public synchronized int available() throws IOException { + if (closed) { + throw new IOException( + FSExceptionMessages.STREAM_IS_CLOSED); + } + final long remaining = this.contentLength - this.getPos(); + return remaining <= Integer.MAX_VALUE + ? (int) remaining : Integer.MAX_VALUE; + } + + /** + * Returns the length of the file that this stream refers to. Note that the length returned is the length + * as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, + * they wont be reflected in the returned length. + * + * @return length of the file. + * @throws IOException if the stream is closed + */ + public long length() throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + return contentLength; + } + + /** + * Return the current offset from the start of the file + * + * @throws IOException throws {@link IOException} if there is an error + */ + @Override + public synchronized long getPos() throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + return fCursor - limit + bCursor; + } + + /** + * Seeks a different copy of the data. Returns true if + * found a new source, false otherwise. + * + * @throws IOException throws {@link IOException} if there is an error + */ + @Override + public boolean seekToNewSource(long l) throws IOException { + return false; + } + + @Override + public synchronized void close() throws IOException { + closed = true; + buffer = null; // de-reference the buffer so it can be GC'ed sooner + } + + /** + * Not supported by this stream. Throws {@link UnsupportedOperationException} + * + * @param readlimit ignored + */ + @Override + public synchronized void mark(int readlimit) { + throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); + } + + /** + * Not supported by this stream. Throws {@link UnsupportedOperationException} + */ + @Override + public synchronized void reset() throws IOException { + throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); + } + + /** + * gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. + * + * @return always {@code false} + */ + @Override + public boolean markSupported() { + return false; + } +} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedOutputStream.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedOutputStream.java new file mode 100644 index 000000000..7b488a5da --- /dev/null +++ b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedOutputStream.java @@ -0,0 +1,283 @@ +package seaweed.hdfs; + +// adapted from org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream + +import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.FSExceptionMessages; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import seaweedfs.client.FilerGrpcClient; +import seaweedfs.client.FilerProto; +import seaweedfs.client.SeaweedWrite; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.io.OutputStream; +import java.util.concurrent.*; + +import static seaweed.hdfs.SeaweedFileSystemStore.getParentDirectory; + +public class SeaweedOutputStream extends OutputStream { + + private static final Logger LOG = LoggerFactory.getLogger(SeaweedOutputStream.class); + + private final FilerGrpcClient filerGrpcClient; + private final Path path; + private final int bufferSize; + private final int maxConcurrentRequestCount; + private final ThreadPoolExecutor threadExecutor; + private final ExecutorCompletionService<Void> completionService; + private FilerProto.Entry.Builder entry; + private long position; + private boolean closed; + private boolean supportFlush = true; + private volatile IOException lastError; + private long lastFlushOffset; + private long lastTotalAppendOffset = 0; + private byte[] buffer; + private int bufferIndex; + private ConcurrentLinkedDeque<WriteOperation> writeOperations; + private String replication = "000"; + + public SeaweedOutputStream(FilerGrpcClient filerGrpcClient, final Path path, FilerProto.Entry.Builder entry, + final long position, final int bufferSize, final String replication) { + this.filerGrpcClient = filerGrpcClient; + this.replication = replication; + this.path = path; + this.position = position; + this.closed = false; + this.lastError = null; + this.lastFlushOffset = 0; + this.bufferSize = bufferSize; + this.buffer = new byte[bufferSize]; + this.bufferIndex = 0; + this.writeOperations = new ConcurrentLinkedDeque<>(); + + this.maxConcurrentRequestCount = 4 * Runtime.getRuntime().availableProcessors(); + + this.threadExecutor + = new ThreadPoolExecutor(maxConcurrentRequestCount, + maxConcurrentRequestCount, + 10L, + TimeUnit.SECONDS, + new LinkedBlockingQueue<Runnable>()); + this.completionService = new ExecutorCompletionService<>(this.threadExecutor); + + this.entry = entry; + + } + + private synchronized void flushWrittenBytesToServiceInternal(final long offset) throws IOException { + + LOG.debug("SeaweedWrite.writeMeta path: {} entry:{}", path, entry); + + try { + SeaweedWrite.writeMeta(filerGrpcClient, getParentDirectory(path), entry); + } catch (Exception ex) { + throw new IOException(ex); + } + this.lastFlushOffset = offset; + } + + @Override + public void write(final int byteVal) throws IOException { + write(new byte[]{(byte) (byteVal & 0xFF)}); + } + + @Override + public synchronized void write(final byte[] data, final int off, final int length) + throws IOException { + maybeThrowLastError(); + + Preconditions.checkArgument(data != null, "null data"); + + if (off < 0 || length < 0 || length > data.length - off) { + throw new IndexOutOfBoundsException(); + } + + int currentOffset = off; + int writableBytes = bufferSize - bufferIndex; + int numberOfBytesToWrite = length; + + while (numberOfBytesToWrite > 0) { + if (writableBytes <= numberOfBytesToWrite) { + System.arraycopy(data, currentOffset, buffer, bufferIndex, writableBytes); + bufferIndex += writableBytes; + writeCurrentBufferToService(); + currentOffset += writableBytes; + numberOfBytesToWrite = numberOfBytesToWrite - writableBytes; + } else { + System.arraycopy(data, currentOffset, buffer, bufferIndex, numberOfBytesToWrite); + bufferIndex += numberOfBytesToWrite; + numberOfBytesToWrite = 0; + } + + writableBytes = bufferSize - bufferIndex; + } + } + + /** + * Flushes this output stream and forces any buffered output bytes to be + * written out. If any data remains in the payload it is committed to the + * service. Data is queued for writing and forced out to the service + * before the call returns. + */ + @Override + public void flush() throws IOException { + if (supportFlush) { + flushInternalAsync(); + } + } + + /** + * Force all data in the output stream to be written to Azure storage. + * Wait to return until this is complete. Close the access to the stream and + * shutdown the upload thread pool. + * If the blob was created, its lease will be released. + * Any error encountered caught in threads and stored will be rethrown here + * after cleanup. + */ + @Override + public synchronized void close() throws IOException { + if (closed) { + return; + } + + LOG.debug("close path: {}", path); + try { + flushInternal(); + threadExecutor.shutdown(); + } finally { + lastError = new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + buffer = null; + bufferIndex = 0; + closed = true; + writeOperations.clear(); + if (!threadExecutor.isShutdown()) { + threadExecutor.shutdownNow(); + } + } + } + + private synchronized void writeCurrentBufferToService() throws IOException { + if (bufferIndex == 0) { + return; + } + + final byte[] bytes = buffer; + final int bytesLength = bufferIndex; + + buffer = new byte[bufferSize]; + bufferIndex = 0; + final long offset = position; + position += bytesLength; + + if (threadExecutor.getQueue().size() >= maxConcurrentRequestCount * 2) { + waitForTaskToComplete(); + } + + final Future<Void> job = completionService.submit(new Callable<Void>() { + @Override + public Void call() throws Exception { + // originally: client.append(path, offset, bytes, 0, bytesLength); + SeaweedWrite.writeData(entry, replication, filerGrpcClient, offset, bytes, 0, bytesLength); + return null; + } + }); + + writeOperations.add(new WriteOperation(job, offset, bytesLength)); + + // Try to shrink the queue + shrinkWriteOperationQueue(); + } + + private void waitForTaskToComplete() throws IOException { + boolean completed; + for (completed = false; completionService.poll() != null; completed = true) { + // keep polling until there is no data + } + + if (!completed) { + try { + completionService.take(); + } catch (InterruptedException e) { + lastError = (IOException) new InterruptedIOException(e.toString()).initCause(e); + throw lastError; + } + } + } + + private void maybeThrowLastError() throws IOException { + if (lastError != null) { + throw lastError; + } + } + + /** + * Try to remove the completed write operations from the beginning of write + * operation FIFO queue. + */ + private synchronized void shrinkWriteOperationQueue() throws IOException { + try { + while (writeOperations.peek() != null && writeOperations.peek().task.isDone()) { + writeOperations.peek().task.get(); + lastTotalAppendOffset += writeOperations.peek().length; + writeOperations.remove(); + } + } catch (Exception e) { + lastError = new IOException(e); + throw lastError; + } + } + + private synchronized void flushInternal() throws IOException { + maybeThrowLastError(); + writeCurrentBufferToService(); + flushWrittenBytesToService(); + } + + private synchronized void flushInternalAsync() throws IOException { + maybeThrowLastError(); + writeCurrentBufferToService(); + flushWrittenBytesToServiceAsync(); + } + + private synchronized void flushWrittenBytesToService() throws IOException { + for (WriteOperation writeOperation : writeOperations) { + try { + writeOperation.task.get(); + } catch (Exception ex) { + lastError = new IOException(ex); + throw lastError; + } + } + LOG.debug("flushWrittenBytesToService: {} position:{}", path, position); + flushWrittenBytesToServiceInternal(position); + } + + private synchronized void flushWrittenBytesToServiceAsync() throws IOException { + shrinkWriteOperationQueue(); + + if (this.lastTotalAppendOffset > this.lastFlushOffset) { + this.flushWrittenBytesToServiceInternal(this.lastTotalAppendOffset); + } + } + + private static class WriteOperation { + private final Future<Void> task; + private final long startOffset; + private final long length; + + WriteOperation(final Future<Void> task, final long startOffset, final long length) { + Preconditions.checkNotNull(task, "task"); + Preconditions.checkArgument(startOffset >= 0, "startOffset"); + Preconditions.checkArgument(length >= 0, "length"); + + this.task = task; + this.startOffset = startOffset; + this.length = length; + } + } + +} diff --git a/other/java/hdfs3/dependency-reduced-pom.xml b/other/java/hdfs3/dependency-reduced-pom.xml new file mode 100644 index 000000000..ca53ffd22 --- /dev/null +++ b/other/java/hdfs3/dependency-reduced-pom.xml @@ -0,0 +1,133 @@ +<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <parent>
+ <artifactId>oss-parent</artifactId>
+ <groupId>org.sonatype.oss</groupId>
+ <version>9</version>
+ <relativePath>../pom.xml/pom.xml</relativePath>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.github.chrislusf</groupId>
+ <artifactId>seaweedfs-hadoop3-client</artifactId>
+ <version>${seaweedfs.client.version}</version>
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>7</source>
+ <target>7</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>3.2.1</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ <exclude>org/slf4j/**</exclude>
+ <exclude>META-INF/maven/org.slf4j/**</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ <transformers>
+ <transformer />
+ </transformers>
+ <relocations>
+ <relocation>
+ <pattern>com.google</pattern>
+ <shadedPattern>shaded.com.google</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>io.grpc.internal</pattern>
+ <shadedPattern>shaded.io.grpc.internal</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.commons</pattern>
+ <shadedPattern>shaded.org.apache.commons</shadedPattern>
+ <excludes>
+ <exclude>org.apache.hadoop</exclude>
+ <exclude>org.apache.log4j</exclude>
+ </excludes>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.http</pattern>
+ <shadedPattern>shaded.org.apache.http</shadedPattern>
+ </relocation>
+ </relocations>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-gpg-plugin</artifactId>
+ <version>1.5</version>
+ <executions>
+ <execution>
+ <id>sign-artifacts</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>sign</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.sonatype.plugins</groupId>
+ <artifactId>nexus-staging-maven-plugin</artifactId>
+ <version>1.6.7</version>
+ <extensions>true</extensions>
+ <configuration>
+ <serverId>ossrh</serverId>
+ <nexusUrl>https://oss.sonatype.org/</nexusUrl>
+ <autoReleaseAfterClose>true</autoReleaseAfterClose>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-source-plugin</artifactId>
+ <version>2.2.1</version>
+ <executions>
+ <execution>
+ <id>attach-sources</id>
+ <goals>
+ <goal>jar-no-fork</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>2.9.1</version>
+ <executions>
+ <execution>
+ <id>attach-javadocs</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ <distributionManagement>
+ <snapshotRepository>
+ <id>ossrh</id>
+ <url>https://oss.sonatype.org/content/repositories/snapshots</url>
+ </snapshotRepository>
+ </distributionManagement>
+ <properties>
+ <seaweedfs.client.version>1.2.4</seaweedfs.client.version>
+ <hadoop.version>3.1.1</hadoop.version>
+ </properties>
+</project>
diff --git a/other/java/hdfs3/pom.xml b/other/java/hdfs3/pom.xml new file mode 100644 index 000000000..f5207213c --- /dev/null +++ b/other/java/hdfs3/pom.xml @@ -0,0 +1,163 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <properties> + <seaweedfs.client.version>1.2.4</seaweedfs.client.version> + <hadoop.version>3.1.1</hadoop.version> + </properties> + + <groupId>com.github.chrislusf</groupId> + <artifactId>seaweedfs-hadoop3-client</artifactId> + <version>${seaweedfs.client.version}</version> + + <parent> + <groupId>org.sonatype.oss</groupId> + <artifactId>oss-parent</artifactId> + <version>9</version> + </parent> + + <distributionManagement> + <snapshotRepository> + <id>ossrh</id> + <url>https://oss.sonatype.org/content/repositories/snapshots</url> + </snapshotRepository> + </distributionManagement> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>7</source> + <target>7</target> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>3.2.1</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + <exclude>org/slf4j/**</exclude> + <exclude>META-INF/maven/org.slf4j/**</exclude> + </excludes> + </filter> + </filters> + <transformers> + <transformer + implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> + </transformers> + <relocations> + <relocation> + <pattern>com.google</pattern> + <shadedPattern>shaded.com.google</shadedPattern> + </relocation> + <relocation> + <pattern>io.grpc.internal</pattern> + <shadedPattern>shaded.io.grpc.internal</shadedPattern> + </relocation> + <relocation> + <pattern>org.apache.commons</pattern> + <shadedPattern>shaded.org.apache.commons</shadedPattern> + <excludes> + <exclude>org.apache.hadoop</exclude> + <exclude>org.apache.log4j</exclude> + </excludes> + </relocation> + <relocation> + <pattern>org.apache.http</pattern> + <shadedPattern>shaded.org.apache.http</shadedPattern> + </relocation> + </relocations> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-gpg-plugin</artifactId> + <version>1.5</version> + <executions> + <execution> + <id>sign-artifacts</id> + <phase>verify</phase> + <goals> + <goal>sign</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.sonatype.plugins</groupId> + <artifactId>nexus-staging-maven-plugin</artifactId> + <version>1.6.7</version> + <extensions>true</extensions> + <configuration> + <serverId>ossrh</serverId> + <nexusUrl>https://oss.sonatype.org/</nexusUrl> + <autoReleaseAfterClose>true</autoReleaseAfterClose> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-source-plugin</artifactId> + <version>2.2.1</version> + <executions> + <execution> + <id>attach-sources</id> + <goals> + <goal>jar-no-fork</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <version>2.9.1</version> + <executions> + <execution> + <id>attach-javadocs</id> + <goals> + <goal>jar</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>com.github.chrislusf</groupId> + <artifactId>seaweedfs-client</artifactId> + <version>${seaweedfs.client.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + </dependency> + </dependencies> + +</project> diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBuffer.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBuffer.java new file mode 100644 index 000000000..926d0b83b --- /dev/null +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBuffer.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package seaweed.hdfs; + +import java.util.concurrent.CountDownLatch; + +class ReadBuffer { + + private SeaweedInputStream stream; + private long offset; // offset within the file for the buffer + private int length; // actual length, set after the buffer is filles + private int requestedLength; // requested length of the read + private byte[] buffer; // the buffer itself + private int bufferindex = -1; // index in the buffers array in Buffer manager + private ReadBufferStatus status; // status of the buffer + private CountDownLatch latch = null; // signaled when the buffer is done reading, so any client + // waiting on this buffer gets unblocked + + // fields to help with eviction logic + private long timeStamp = 0; // tick at which buffer became available to read + private boolean isFirstByteConsumed = false; + private boolean isLastByteConsumed = false; + private boolean isAnyByteConsumed = false; + + public SeaweedInputStream getStream() { + return stream; + } + + public void setStream(SeaweedInputStream stream) { + this.stream = stream; + } + + public long getOffset() { + return offset; + } + + public void setOffset(long offset) { + this.offset = offset; + } + + public int getLength() { + return length; + } + + public void setLength(int length) { + this.length = length; + } + + public int getRequestedLength() { + return requestedLength; + } + + public void setRequestedLength(int requestedLength) { + this.requestedLength = requestedLength; + } + + public byte[] getBuffer() { + return buffer; + } + + public void setBuffer(byte[] buffer) { + this.buffer = buffer; + } + + public int getBufferindex() { + return bufferindex; + } + + public void setBufferindex(int bufferindex) { + this.bufferindex = bufferindex; + } + + public ReadBufferStatus getStatus() { + return status; + } + + public void setStatus(ReadBufferStatus status) { + this.status = status; + } + + public CountDownLatch getLatch() { + return latch; + } + + public void setLatch(CountDownLatch latch) { + this.latch = latch; + } + + public long getTimeStamp() { + return timeStamp; + } + + public void setTimeStamp(long timeStamp) { + this.timeStamp = timeStamp; + } + + public boolean isFirstByteConsumed() { + return isFirstByteConsumed; + } + + public void setFirstByteConsumed(boolean isFirstByteConsumed) { + this.isFirstByteConsumed = isFirstByteConsumed; + } + + public boolean isLastByteConsumed() { + return isLastByteConsumed; + } + + public void setLastByteConsumed(boolean isLastByteConsumed) { + this.isLastByteConsumed = isLastByteConsumed; + } + + public boolean isAnyByteConsumed() { + return isAnyByteConsumed; + } + + public void setAnyByteConsumed(boolean isAnyByteConsumed) { + this.isAnyByteConsumed = isAnyByteConsumed; + } + +} diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferManager.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferManager.java new file mode 100644 index 000000000..5b1e21529 --- /dev/null +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferManager.java @@ -0,0 +1,394 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package seaweed.hdfs; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.Queue; +import java.util.Stack; +import java.util.concurrent.CountDownLatch; + +/** + * The Read Buffer Manager for Rest AbfsClient. + */ +final class ReadBufferManager { + private static final Logger LOGGER = LoggerFactory.getLogger(ReadBufferManager.class); + + private static final int NUM_BUFFERS = 16; + private static final int BLOCK_SIZE = 4 * 1024 * 1024; + private static final int NUM_THREADS = 8; + private static final int THRESHOLD_AGE_MILLISECONDS = 3000; // have to see if 3 seconds is a good threshold + + private Thread[] threads = new Thread[NUM_THREADS]; + private byte[][] buffers; // array of byte[] buffers, to hold the data that is read + private Stack<Integer> freeList = new Stack<>(); // indices in buffers[] array that are available + + private Queue<ReadBuffer> readAheadQueue = new LinkedList<>(); // queue of requests that are not picked up by any worker thread yet + private LinkedList<ReadBuffer> inProgressList = new LinkedList<>(); // requests being processed by worker threads + private LinkedList<ReadBuffer> completedReadList = new LinkedList<>(); // buffers available for reading + private static final ReadBufferManager BUFFER_MANAGER; // singleton, initialized in static initialization block + + static { + BUFFER_MANAGER = new ReadBufferManager(); + BUFFER_MANAGER.init(); + } + + static ReadBufferManager getBufferManager() { + return BUFFER_MANAGER; + } + + private void init() { + buffers = new byte[NUM_BUFFERS][]; + for (int i = 0; i < NUM_BUFFERS; i++) { + buffers[i] = new byte[BLOCK_SIZE]; // same buffers are reused. The byte array never goes back to GC + freeList.add(i); + } + for (int i = 0; i < NUM_THREADS; i++) { + Thread t = new Thread(new ReadBufferWorker(i)); + t.setDaemon(true); + threads[i] = t; + t.setName("SeaweedFS-prefetch-" + i); + t.start(); + } + ReadBufferWorker.UNLEASH_WORKERS.countDown(); + } + + // hide instance constructor + private ReadBufferManager() { + } + + + /* + * + * SeaweedInputStream-facing methods + * + */ + + + /** + * {@link SeaweedInputStream} calls this method to queue read-aheads. + * + * @param stream The {@link SeaweedInputStream} for which to do the read-ahead + * @param requestedOffset The offset in the file which shoukd be read + * @param requestedLength The length to read + */ + void queueReadAhead(final SeaweedInputStream stream, final long requestedOffset, final int requestedLength) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Start Queueing readAhead for {} offset {} length {}", + stream.getPath(), requestedOffset, requestedLength); + } + ReadBuffer buffer; + synchronized (this) { + if (isAlreadyQueued(stream, requestedOffset)) { + return; // already queued, do not queue again + } + if (freeList.isEmpty() && !tryEvict()) { + return; // no buffers available, cannot queue anything + } + + buffer = new ReadBuffer(); + buffer.setStream(stream); + buffer.setOffset(requestedOffset); + buffer.setLength(0); + buffer.setRequestedLength(requestedLength); + buffer.setStatus(ReadBufferStatus.NOT_AVAILABLE); + buffer.setLatch(new CountDownLatch(1)); + + Integer bufferIndex = freeList.pop(); // will return a value, since we have checked size > 0 already + + buffer.setBuffer(buffers[bufferIndex]); + buffer.setBufferindex(bufferIndex); + readAheadQueue.add(buffer); + notifyAll(); + } + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Done q-ing readAhead for file {} offset {} buffer idx {}", + stream.getPath(), requestedOffset, buffer.getBufferindex()); + } + } + + + /** + * {@link SeaweedInputStream} calls this method read any bytes already available in a buffer (thereby saving a + * remote read). This returns the bytes if the data already exists in buffer. If there is a buffer that is reading + * the requested offset, then this method blocks until that read completes. If the data is queued in a read-ahead + * but not picked up by a worker thread yet, then it cancels that read-ahead and reports cache miss. This is because + * depending on worker thread availability, the read-ahead may take a while - the calling thread can do it's own + * read to get the data faster (copmared to the read waiting in queue for an indeterminate amount of time). + * + * @param stream the file to read bytes for + * @param position the offset in the file to do a read for + * @param length the length to read + * @param buffer the buffer to read data into. Note that the buffer will be written into from offset 0. + * @return the number of bytes read + */ + int getBlock(final SeaweedInputStream stream, final long position, final int length, final byte[] buffer) { + // not synchronized, so have to be careful with locking + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("getBlock for file {} position {} thread {}", + stream.getPath(), position, Thread.currentThread().getName()); + } + + waitForProcess(stream, position); + + int bytesRead = 0; + synchronized (this) { + bytesRead = getBlockFromCompletedQueue(stream, position, length, buffer); + } + if (bytesRead > 0) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Done read from Cache for {} position {} length {}", + stream.getPath(), position, bytesRead); + } + return bytesRead; + } + + // otherwise, just say we got nothing - calling thread can do its own read + return 0; + } + + /* + * + * Internal methods + * + */ + + private void waitForProcess(final SeaweedInputStream stream, final long position) { + ReadBuffer readBuf; + synchronized (this) { + clearFromReadAheadQueue(stream, position); + readBuf = getFromList(inProgressList, stream, position); + } + if (readBuf != null) { // if in in-progress queue, then block for it + try { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("got a relevant read buffer for file {} offset {} buffer idx {}", + stream.getPath(), readBuf.getOffset(), readBuf.getBufferindex()); + } + readBuf.getLatch().await(); // blocking wait on the caller stream's thread + // Note on correctness: readBuf gets out of inProgressList only in 1 place: after worker thread + // is done processing it (in doneReading). There, the latch is set after removing the buffer from + // inProgressList. So this latch is safe to be outside the synchronized block. + // Putting it in synchronized would result in a deadlock, since this thread would be holding the lock + // while waiting, so no one will be able to change any state. If this becomes more complex in the future, + // then the latch cane be removed and replaced with wait/notify whenever inProgressList is touched. + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + } + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("latch done for file {} buffer idx {} length {}", + stream.getPath(), readBuf.getBufferindex(), readBuf.getLength()); + } + } + } + + /** + * If any buffer in the completedlist can be reclaimed then reclaim it and return the buffer to free list. + * The objective is to find just one buffer - there is no advantage to evicting more than one. + * + * @return whether the eviction succeeeded - i.e., were we able to free up one buffer + */ + private synchronized boolean tryEvict() { + ReadBuffer nodeToEvict = null; + if (completedReadList.size() <= 0) { + return false; // there are no evict-able buffers + } + + // first, try buffers where all bytes have been consumed (approximated as first and last bytes consumed) + for (ReadBuffer buf : completedReadList) { + if (buf.isFirstByteConsumed() && buf.isLastByteConsumed()) { + nodeToEvict = buf; + break; + } + } + if (nodeToEvict != null) { + return evict(nodeToEvict); + } + + // next, try buffers where any bytes have been consumed (may be a bad idea? have to experiment and see) + for (ReadBuffer buf : completedReadList) { + if (buf.isAnyByteConsumed()) { + nodeToEvict = buf; + break; + } + } + + if (nodeToEvict != null) { + return evict(nodeToEvict); + } + + // next, try any old nodes that have not been consumed + long earliestBirthday = Long.MAX_VALUE; + for (ReadBuffer buf : completedReadList) { + if (buf.getTimeStamp() < earliestBirthday) { + nodeToEvict = buf; + earliestBirthday = buf.getTimeStamp(); + } + } + if ((currentTimeMillis() - earliestBirthday > THRESHOLD_AGE_MILLISECONDS) && (nodeToEvict != null)) { + return evict(nodeToEvict); + } + + // nothing can be evicted + return false; + } + + private boolean evict(final ReadBuffer buf) { + freeList.push(buf.getBufferindex()); + completedReadList.remove(buf); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Evicting buffer idx {}; was used for file {} offset {} length {}", + buf.getBufferindex(), buf.getStream().getPath(), buf.getOffset(), buf.getLength()); + } + return true; + } + + private boolean isAlreadyQueued(final SeaweedInputStream stream, final long requestedOffset) { + // returns true if any part of the buffer is already queued + return (isInList(readAheadQueue, stream, requestedOffset) + || isInList(inProgressList, stream, requestedOffset) + || isInList(completedReadList, stream, requestedOffset)); + } + + private boolean isInList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) { + return (getFromList(list, stream, requestedOffset) != null); + } + + private ReadBuffer getFromList(final Collection<ReadBuffer> list, final SeaweedInputStream stream, final long requestedOffset) { + for (ReadBuffer buffer : list) { + if (buffer.getStream() == stream) { + if (buffer.getStatus() == ReadBufferStatus.AVAILABLE + && requestedOffset >= buffer.getOffset() + && requestedOffset < buffer.getOffset() + buffer.getLength()) { + return buffer; + } else if (requestedOffset >= buffer.getOffset() + && requestedOffset < buffer.getOffset() + buffer.getRequestedLength()) { + return buffer; + } + } + } + return null; + } + + private void clearFromReadAheadQueue(final SeaweedInputStream stream, final long requestedOffset) { + ReadBuffer buffer = getFromList(readAheadQueue, stream, requestedOffset); + if (buffer != null) { + readAheadQueue.remove(buffer); + notifyAll(); // lock is held in calling method + freeList.push(buffer.getBufferindex()); + } + } + + private int getBlockFromCompletedQueue(final SeaweedInputStream stream, final long position, final int length, + final byte[] buffer) { + ReadBuffer buf = getFromList(completedReadList, stream, position); + if (buf == null || position >= buf.getOffset() + buf.getLength()) { + return 0; + } + int cursor = (int) (position - buf.getOffset()); + int availableLengthInBuffer = buf.getLength() - cursor; + int lengthToCopy = Math.min(length, availableLengthInBuffer); + System.arraycopy(buf.getBuffer(), cursor, buffer, 0, lengthToCopy); + if (cursor == 0) { + buf.setFirstByteConsumed(true); + } + if (cursor + lengthToCopy == buf.getLength()) { + buf.setLastByteConsumed(true); + } + buf.setAnyByteConsumed(true); + return lengthToCopy; + } + + /* + * + * ReadBufferWorker-thread-facing methods + * + */ + + /** + * ReadBufferWorker thread calls this to get the next buffer that it should work on. + * + * @return {@link ReadBuffer} + * @throws InterruptedException if thread is interrupted + */ + ReadBuffer getNextBlockToRead() throws InterruptedException { + ReadBuffer buffer = null; + synchronized (this) { + //buffer = readAheadQueue.take(); // blocking method + while (readAheadQueue.size() == 0) { + wait(); + } + buffer = readAheadQueue.remove(); + notifyAll(); + if (buffer == null) { + return null; // should never happen + } + buffer.setStatus(ReadBufferStatus.READING_IN_PROGRESS); + inProgressList.add(buffer); + } + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("ReadBufferWorker picked file {} for offset {}", + buffer.getStream().getPath(), buffer.getOffset()); + } + return buffer; + } + + /** + * ReadBufferWorker thread calls this method to post completion. + * + * @param buffer the buffer whose read was completed + * @param result the {@link ReadBufferStatus} after the read operation in the worker thread + * @param bytesActuallyRead the number of bytes that the worker thread was actually able to read + */ + void doneReading(final ReadBuffer buffer, final ReadBufferStatus result, final int bytesActuallyRead) { + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("ReadBufferWorker completed file {} for offset {} bytes {}", + buffer.getStream().getPath(), buffer.getOffset(), bytesActuallyRead); + } + synchronized (this) { + inProgressList.remove(buffer); + if (result == ReadBufferStatus.AVAILABLE && bytesActuallyRead > 0) { + buffer.setStatus(ReadBufferStatus.AVAILABLE); + buffer.setTimeStamp(currentTimeMillis()); + buffer.setLength(bytesActuallyRead); + completedReadList.add(buffer); + } else { + freeList.push(buffer.getBufferindex()); + // buffer should go out of scope after the end of the calling method in ReadBufferWorker, and eligible for GC + } + } + //outside the synchronized, since anyone receiving a wake-up from the latch must see safe-published results + buffer.getLatch().countDown(); // wake up waiting threads (if any) + } + + /** + * Similar to System.currentTimeMillis, except implemented with System.nanoTime(). + * System.currentTimeMillis can go backwards when system clock is changed (e.g., with NTP time synchronization), + * making it unsuitable for measuring time intervals. nanotime is strictly monotonically increasing per CPU core. + * Note: it is not monotonic across Sockets, and even within a CPU, its only the + * more recent parts which share a clock across all cores. + * + * @return current time in milliseconds + */ + private long currentTimeMillis() { + return System.nanoTime() / 1000 / 1000; + } +} diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferStatus.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferStatus.java new file mode 100644 index 000000000..d63674977 --- /dev/null +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferStatus.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package seaweed.hdfs; + +/** + * The ReadBufferStatus for Rest AbfsClient + */ +public enum ReadBufferStatus { + NOT_AVAILABLE, // buffers sitting in readaheadqueue have this stats + READING_IN_PROGRESS, // reading is in progress on this buffer. Buffer should be in inProgressList + AVAILABLE, // data is available in buffer. It should be in completedList + READ_FAILED // read completed, but failed. +} diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferWorker.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferWorker.java new file mode 100644 index 000000000..6ffbc4644 --- /dev/null +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/ReadBufferWorker.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package seaweed.hdfs; + +import java.util.concurrent.CountDownLatch; + +class ReadBufferWorker implements Runnable { + + protected static final CountDownLatch UNLEASH_WORKERS = new CountDownLatch(1); + private int id; + + ReadBufferWorker(final int id) { + this.id = id; + } + + /** + * return the ID of ReadBufferWorker. + */ + public int getId() { + return this.id; + } + + /** + * Waits until a buffer becomes available in ReadAheadQueue. + * Once a buffer becomes available, reads the file specified in it and then posts results back to buffer manager. + * Rinse and repeat. Forever. + */ + public void run() { + try { + UNLEASH_WORKERS.await(); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + } + ReadBufferManager bufferManager = ReadBufferManager.getBufferManager(); + ReadBuffer buffer; + while (true) { + try { + buffer = bufferManager.getNextBlockToRead(); // blocks, until a buffer is available for this thread + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + return; + } + if (buffer != null) { + try { + // do the actual read, from the file. + int bytesRead = buffer.getStream().readRemote(buffer.getOffset(), buffer.getBuffer(), 0, buffer.getRequestedLength()); + bufferManager.doneReading(buffer, ReadBufferStatus.AVAILABLE, bytesRead); // post result back to ReadBufferManager + } catch (Exception ex) { + bufferManager.doneReading(buffer, ReadBufferStatus.READ_FAILED, 0); + } + } + } + } +} diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java new file mode 100644 index 000000000..c12da8261 --- /dev/null +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java @@ -0,0 +1,620 @@ +package seaweed.hdfs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Progressable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; + +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; + +public class SeaweedFileSystem extends FileSystem { + + public static final int FS_SEAWEED_DEFAULT_PORT = 8888; + public static final String FS_SEAWEED_FILER_HOST = "fs.seaweed.filer.host"; + public static final String FS_SEAWEED_FILER_PORT = "fs.seaweed.filer.port"; + + private static final Logger LOG = LoggerFactory.getLogger(SeaweedFileSystem.class); + private static int BUFFER_SIZE = 16 * 1024 * 1024; + + private URI uri; + private Path workingDirectory = new Path("/"); + private SeaweedFileSystemStore seaweedFileSystemStore; + + public URI getUri() { + return uri; + } + + public String getScheme() { + return "seaweedfs"; + } + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { // get + super.initialize(uri, conf); + + // get host information from uri (overrides info in conf) + String host = uri.getHost(); + host = (host == null) ? conf.get(FS_SEAWEED_FILER_HOST, "localhost") : host; + if (host == null) { + throw new IOException("Invalid host specified"); + } + conf.set(FS_SEAWEED_FILER_HOST, host); + + // get port information from uri, (overrides info in conf) + int port = uri.getPort(); + port = (port == -1) ? FS_SEAWEED_DEFAULT_PORT : port; + conf.setInt(FS_SEAWEED_FILER_PORT, port); + + conf.setInt(IO_FILE_BUFFER_SIZE_KEY, BUFFER_SIZE); + + setConf(conf); + this.uri = uri; + + seaweedFileSystemStore = new SeaweedFileSystemStore(host, port); + + } + + @Override + public FSDataInputStream open(Path path, int bufferSize) throws IOException { + + LOG.debug("open path: {} bufferSize:{}", path, bufferSize); + + path = qualify(path); + + try { + InputStream inputStream = seaweedFileSystemStore.openFileForRead(path, statistics, bufferSize); + return new FSDataInputStream(inputStream); + } catch (Exception ex) { + LOG.warn("open path: {} bufferSize:{}", path, bufferSize, ex); + return null; + } + } + + @Override + public FSDataOutputStream create(Path path, FsPermission permission, final boolean overwrite, final int bufferSize, + final short replication, final long blockSize, final Progressable progress) throws IOException { + + LOG.debug("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize); + + path = qualify(path); + + try { + String replicaPlacement = String.format("%03d", replication - 1); + OutputStream outputStream = seaweedFileSystemStore.createFile(path, overwrite, permission, bufferSize, replicaPlacement); + return new FSDataOutputStream(outputStream, statistics); + } catch (Exception ex) { + LOG.warn("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize, ex); + return null; + } + } + + /** + * {@inheritDoc} + * @throws FileNotFoundException if the parent directory is not present -or + * is not a directory. + */ + @Override + public FSDataOutputStream createNonRecursive(Path path, + FsPermission permission, + EnumSet<CreateFlag> flags, + int bufferSize, + short replication, + long blockSize, + Progressable progress) throws IOException { + Path parent = path.getParent(); + if (parent != null) { + // expect this to raise an exception if there is no parent + if (!getFileStatus(parent).isDirectory()) { + throw new FileAlreadyExistsException("Not a directory: " + parent); + } + } + return create(path, permission, + flags.contains(CreateFlag.OVERWRITE), bufferSize, + replication, blockSize, progress); + } + + @Override + public FSDataOutputStream append(Path path, int bufferSize, Progressable progressable) throws IOException { + + LOG.debug("append path: {} bufferSize:{}", path, bufferSize); + + path = qualify(path); + try { + OutputStream outputStream = seaweedFileSystemStore.createFile(path, false, null, bufferSize, ""); + return new FSDataOutputStream(outputStream, statistics); + } catch (Exception ex) { + LOG.warn("append path: {} bufferSize:{}", path, bufferSize, ex); + return null; + } + } + + @Override + public boolean rename(Path src, Path dst) { + + LOG.debug("rename path: {} => {}", src, dst); + + if (src.isRoot()) { + return false; + } + + if (src.equals(dst)) { + return true; + } + FileStatus dstFileStatus = getFileStatus(dst); + + String sourceFileName = src.getName(); + Path adjustedDst = dst; + + if (dstFileStatus != null) { + if (!dstFileStatus.isDirectory()) { + return false; + } + adjustedDst = new Path(dst, sourceFileName); + } + + Path qualifiedSrcPath = qualify(src); + Path qualifiedDstPath = qualify(adjustedDst); + + seaweedFileSystemStore.rename(qualifiedSrcPath, qualifiedDstPath); + return true; + } + + @Override + public boolean delete(Path path, boolean recursive) { + + LOG.debug("delete path: {} recursive:{}", path, recursive); + + path = qualify(path); + + FileStatus fileStatus = getFileStatus(path); + + if (fileStatus == null) { + return true; + } + + return seaweedFileSystemStore.deleteEntries(path, fileStatus.isDirectory(), recursive); + + } + + @Override + public FileStatus[] listStatus(Path path) throws IOException { + + LOG.debug("listStatus path: {}", path); + + path = qualify(path); + + return seaweedFileSystemStore.listEntries(path); + } + + @Override + public Path getWorkingDirectory() { + return workingDirectory; + } + + @Override + public void setWorkingDirectory(Path path) { + if (path.isAbsolute()) { + workingDirectory = path; + } else { + workingDirectory = new Path(workingDirectory, path); + } + } + + @Override + public boolean mkdirs(Path path, FsPermission fsPermission) throws IOException { + + LOG.debug("mkdirs path: {}", path); + + path = qualify(path); + + FileStatus fileStatus = getFileStatus(path); + + if (fileStatus == null) { + + UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); + return seaweedFileSystemStore.createDirectory(path, currentUser, + fsPermission == null ? FsPermission.getDirDefault() : fsPermission, + FsPermission.getUMask(getConf())); + + } + + if (fileStatus.isDirectory()) { + return true; + } else { + throw new FileAlreadyExistsException("Path is a file: " + path); + } + } + + @Override + public FileStatus getFileStatus(Path path) { + + LOG.debug("getFileStatus path: {}", path); + + path = qualify(path); + + return seaweedFileSystemStore.getFileStatus(path); + } + + /** + * Set owner of a path (i.e. a file or a directory). + * The parameters owner and group cannot both be null. + * + * @param path The path + * @param owner If it is null, the original username remains unchanged. + * @param group If it is null, the original groupname remains unchanged. + */ + @Override + public void setOwner(Path path, final String owner, final String group) + throws IOException { + LOG.debug("setOwner path: {}", path); + path = qualify(path); + + seaweedFileSystemStore.setOwner(path, owner, group); + } + + + /** + * Set permission of a path. + * + * @param path The path + * @param permission Access permission + */ + @Override + public void setPermission(Path path, final FsPermission permission) throws IOException { + LOG.debug("setPermission path: {}", path); + + if (permission == null) { + throw new IllegalArgumentException("The permission can't be null"); + } + + path = qualify(path); + + seaweedFileSystemStore.setPermission(path, permission); + } + + Path qualify(Path path) { + return path.makeQualified(uri, workingDirectory); + } + + /** + * Concat existing files together. + * + * @param trg the path to the target destination. + * @param psrcs the paths to the sources to use for the concatenation. + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default). + */ + @Override + public void concat(final Path trg, final Path[] psrcs) throws IOException { + throw new UnsupportedOperationException("Not implemented by the " + + getClass().getSimpleName() + " FileSystem implementation"); + } + + /** + * Truncate the file in the indicated path to the indicated size. + * <ul> + * <li>Fails if path is a directory.</li> + * <li>Fails if path does not exist.</li> + * <li>Fails if path is not closed.</li> + * <li>Fails if new size is greater than current size.</li> + * </ul> + * + * @param f The path to the file to be truncated + * @param newLength The size the file is to be truncated to + * @return <code>true</code> if the file has been truncated to the desired + * <code>newLength</code> and is immediately available to be reused for + * write operations such as <code>append</code>, or + * <code>false</code> if a background process of adjusting the length of + * the last block has been started, and clients should wait for it to + * complete before proceeding with further file updates. + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default). + */ + @Override + public boolean truncate(Path f, long newLength) throws IOException { + throw new UnsupportedOperationException("Not implemented by the " + + getClass().getSimpleName() + " FileSystem implementation"); + } + + @Override + public void createSymlink(final Path target, final Path link, + final boolean createParent) throws AccessControlException, + FileAlreadyExistsException, FileNotFoundException, + ParentNotDirectoryException, UnsupportedFileSystemException, + IOException { + // Supporting filesystems should override this method + throw new UnsupportedOperationException( + "Filesystem does not support symlinks!"); + } + + public boolean supportsSymlinks() { + return false; + } + + /** + * Create a snapshot. + * + * @param path The directory where snapshots will be taken. + * @param snapshotName The name of the snapshot + * @return the snapshot path. + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + */ + @Override + public Path createSnapshot(Path path, String snapshotName) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support createSnapshot"); + } + + /** + * Rename a snapshot. + * + * @param path The directory path where the snapshot was taken + * @param snapshotOldName Old name of the snapshot + * @param snapshotNewName New name of the snapshot + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void renameSnapshot(Path path, String snapshotOldName, + String snapshotNewName) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support renameSnapshot"); + } + + /** + * Delete a snapshot of a directory. + * + * @param path The directory that the to-be-deleted snapshot belongs to + * @param snapshotName The name of the snapshot + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void deleteSnapshot(Path path, String snapshotName) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support deleteSnapshot"); + } + + /** + * Modifies ACL entries of files and directories. This method can add new ACL + * entries or modify the permissions on existing ACL entries. All existing + * ACL entries that are not specified in this call are retained without + * changes. (Modifications are merged into the current ACL.) + * + * @param path Path to modify + * @param aclSpec List<AclEntry> describing modifications + * @throws IOException if an ACL could not be modified + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void modifyAclEntries(Path path, List<AclEntry> aclSpec) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support modifyAclEntries"); + } + + /** + * Removes ACL entries from files and directories. Other ACL entries are + * retained. + * + * @param path Path to modify + * @param aclSpec List describing entries to remove + * @throws IOException if an ACL could not be modified + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void removeAclEntries(Path path, List<AclEntry> aclSpec) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support removeAclEntries"); + } + + /** + * Removes all default ACL entries from files and directories. + * + * @param path Path to modify + * @throws IOException if an ACL could not be modified + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void removeDefaultAcl(Path path) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support removeDefaultAcl"); + } + + /** + * Removes all but the base ACL entries of files and directories. The entries + * for user, group, and others are retained for compatibility with permission + * bits. + * + * @param path Path to modify + * @throws IOException if an ACL could not be removed + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void removeAcl(Path path) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support removeAcl"); + } + + /** + * Fully replaces ACL of files and directories, discarding all existing + * entries. + * + * @param path Path to modify + * @param aclSpec List describing modifications, which must include entries + * for user, group, and others for compatibility with permission bits. + * @throws IOException if an ACL could not be modified + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void setAcl(Path path, List<AclEntry> aclSpec) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support setAcl"); + } + + /** + * Gets the ACL of a file or directory. + * + * @param path Path to get + * @return AclStatus describing the ACL of the file or directory + * @throws IOException if an ACL could not be read + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public AclStatus getAclStatus(Path path) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support getAclStatus"); + } + + /** + * Set an xattr of a file or directory. + * The name must be prefixed with the namespace followed by ".". For example, + * "user.attr". + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to modify + * @param name xattr name. + * @param value xattr value. + * @param flag xattr set flag + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void setXAttr(Path path, String name, byte[] value, + EnumSet<XAttrSetFlag> flag) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support setXAttr"); + } + + /** + * Get an xattr name and value for a file or directory. + * The name must be prefixed with the namespace followed by ".". For example, + * "user.attr". + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to get extended attribute + * @param name xattr name. + * @return byte[] xattr value. + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public byte[] getXAttr(Path path, String name) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support getXAttr"); + } + + /** + * Get all of the xattr name/value pairs for a file or directory. + * Only those xattrs which the logged-in user has permissions to view + * are returned. + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to get extended attributes + * @return Map describing the XAttrs of the file or directory + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public Map<String, byte[]> getXAttrs(Path path) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support getXAttrs"); + } + + /** + * Get all of the xattrs name/value pairs for a file or directory. + * Only those xattrs which the logged-in user has permissions to view + * are returned. + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to get extended attributes + * @param names XAttr names. + * @return Map describing the XAttrs of the file or directory + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public Map<String, byte[]> getXAttrs(Path path, List<String> names) + throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support getXAttrs"); + } + + /** + * Get all of the xattr names for a file or directory. + * Only those xattr names which the logged-in user has permissions to view + * are returned. + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to get extended attributes + * @return List{@literal <String>} of the XAttr names of the file or directory + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public List<String> listXAttrs(Path path) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support listXAttrs"); + } + + /** + * Remove an xattr of a file or directory. + * The name must be prefixed with the namespace followed by ".". For example, + * "user.attr". + * <p> + * Refer to the HDFS extended attributes user documentation for details. + * + * @param path Path to remove extended attribute + * @param name xattr name + * @throws IOException IO failure + * @throws UnsupportedOperationException if the operation is unsupported + * (default outcome). + */ + @Override + public void removeXAttr(Path path, String name) throws IOException { + throw new UnsupportedOperationException(getClass().getSimpleName() + + " doesn't support removeXAttr"); + } + +} diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java new file mode 100644 index 000000000..774c090e8 --- /dev/null +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java @@ -0,0 +1,270 @@ +package seaweed.hdfs; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import seaweedfs.client.FilerClient; +import seaweedfs.client.FilerGrpcClient; +import seaweedfs.client.FilerProto; +import seaweedfs.client.SeaweedRead; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class SeaweedFileSystemStore { + + private static final Logger LOG = LoggerFactory.getLogger(SeaweedFileSystemStore.class); + + private FilerGrpcClient filerGrpcClient; + private FilerClient filerClient; + + public SeaweedFileSystemStore(String host, int port) { + int grpcPort = 10000 + port; + filerGrpcClient = new FilerGrpcClient(host, grpcPort); + filerClient = new FilerClient(filerGrpcClient); + } + + public static String getParentDirectory(Path path) { + return path.isRoot() ? "/" : path.getParent().toUri().getPath(); + } + + static int permissionToMode(FsPermission permission, boolean isDirectory) { + int p = permission.toShort(); + if (isDirectory) { + p = p | 1 << 31; + } + return p; + } + + public boolean createDirectory(final Path path, UserGroupInformation currentUser, + final FsPermission permission, final FsPermission umask) { + + LOG.debug("createDirectory path: {} permission: {} umask: {}", + path, + permission, + umask); + + return filerClient.mkdirs( + path.toUri().getPath(), + permissionToMode(permission, true), + currentUser.getUserName(), + currentUser.getGroupNames() + ); + } + + public FileStatus[] listEntries(final Path path) { + LOG.debug("listEntries path: {}", path); + + List<FileStatus> fileStatuses = new ArrayList<FileStatus>(); + + List<FilerProto.Entry> entries = filerClient.listEntries(path.toUri().getPath()); + + for (FilerProto.Entry entry : entries) { + + FileStatus fileStatus = doGetFileStatus(new Path(path, entry.getName()), entry); + + fileStatuses.add(fileStatus); + } + return fileStatuses.toArray(new FileStatus[0]); + } + + public FileStatus getFileStatus(final Path path) { + + FilerProto.Entry entry = lookupEntry(path); + if (entry == null) { + return null; + } + LOG.debug("doGetFileStatus path:{} entry:{}", path, entry); + + FileStatus fileStatus = doGetFileStatus(path, entry); + return fileStatus; + } + + public boolean deleteEntries(final Path path, boolean isDirectory, boolean recursive) { + LOG.debug("deleteEntries path: {} isDirectory {} recursive: {}", + path, + String.valueOf(isDirectory), + String.valueOf(recursive)); + + if (path.isRoot()) { + return true; + } + + if (recursive && isDirectory) { + List<FilerProto.Entry> entries = filerClient.listEntries(path.toUri().getPath()); + for (FilerProto.Entry entry : entries) { + deleteEntries(new Path(path, entry.getName()), entry.getIsDirectory(), true); + } + } + + return filerClient.deleteEntry(getParentDirectory(path), path.getName(), true, recursive, true); + } + + private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) { + FilerProto.FuseAttributes attributes = entry.getAttributes(); + long length = SeaweedRead.totalSize(entry.getChunksList()); + boolean isDir = entry.getIsDirectory(); + int block_replication = 1; + int blocksize = 512; + long modification_time = attributes.getMtime() * 1000; // milliseconds + long access_time = 0; + FsPermission permission = FsPermission.createImmutable((short) attributes.getFileMode()); + String owner = attributes.getUserName(); + String group = attributes.getGroupNameCount() > 0 ? attributes.getGroupName(0) : ""; + return new FileStatus(length, isDir, block_replication, blocksize, + modification_time, access_time, permission, owner, group, null, path); + } + + private FilerProto.Entry lookupEntry(Path path) { + + return filerClient.lookupEntry(getParentDirectory(path), path.getName()); + + } + + public void rename(Path source, Path destination) { + + LOG.debug("rename source: {} destination:{}", source, destination); + + if (source.isRoot()) { + return; + } + LOG.info("rename source: {} destination:{}", source, destination); + FilerProto.Entry entry = lookupEntry(source); + if (entry == null) { + LOG.warn("rename non-existing source: {}", source); + return; + } + filerClient.mv(source.toUri().getPath(), destination.toUri().getPath()); + } + + public OutputStream createFile(final Path path, + final boolean overwrite, + FsPermission permission, + int bufferSize, + String replication) throws IOException { + + permission = permission == null ? FsPermission.getFileDefault() : permission; + + LOG.debug("createFile path: {} overwrite: {} permission: {}", + path, + overwrite, + permission.toString()); + + UserGroupInformation userGroupInformation = UserGroupInformation.getCurrentUser(); + long now = System.currentTimeMillis() / 1000L; + + FilerProto.Entry.Builder entry = null; + long writePosition = 0; + if (!overwrite) { + FilerProto.Entry existingEntry = lookupEntry(path); + LOG.debug("createFile merged entry path:{} existingEntry:{}", path, existingEntry); + if (existingEntry != null) { + entry = FilerProto.Entry.newBuilder(); + entry.mergeFrom(existingEntry); + entry.getAttributesBuilder().setMtime(now); + LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry); + writePosition = SeaweedRead.totalSize(existingEntry.getChunksList()); + replication = existingEntry.getAttributes().getReplication(); + } + } + if (entry == null) { + entry = FilerProto.Entry.newBuilder() + .setName(path.getName()) + .setIsDirectory(false) + .setAttributes(FilerProto.FuseAttributes.newBuilder() + .setFileMode(permissionToMode(permission, false)) + .setReplication(replication) + .setCrtime(now) + .setMtime(now) + .setUserName(userGroupInformation.getUserName()) + .clearGroupName() + .addAllGroupName(Arrays.asList(userGroupInformation.getGroupNames())) + ); + } + + return new SeaweedOutputStream(filerGrpcClient, path, entry, writePosition, bufferSize, replication); + + } + + public InputStream openFileForRead(final Path path, FileSystem.Statistics statistics, + int bufferSize) throws IOException { + + LOG.debug("openFileForRead path:{} bufferSize:{}", path, bufferSize); + + int readAheadQueueDepth = 2; + FilerProto.Entry entry = lookupEntry(path); + + if (entry == null) { + throw new FileNotFoundException("read non-exist file " + path); + } + + return new SeaweedInputStream(filerGrpcClient, + statistics, + path.toUri().getPath(), + entry, + bufferSize, + readAheadQueueDepth); + } + + public void setOwner(Path path, String owner, String group) { + + LOG.debug("setOwner path:{} owner:{} group:{}", path, owner, group); + + FilerProto.Entry entry = lookupEntry(path); + if (entry == null) { + LOG.debug("setOwner path:{} entry:{}", path, entry); + return; + } + + FilerProto.Entry.Builder entryBuilder = entry.toBuilder(); + FilerProto.FuseAttributes.Builder attributesBuilder = entry.getAttributes().toBuilder(); + + if (owner != null) { + attributesBuilder.setUserName(owner); + } + if (group != null) { + attributesBuilder.clearGroupName(); + attributesBuilder.addGroupName(group); + } + + entryBuilder.setAttributes(attributesBuilder); + + LOG.debug("setOwner path:{} entry:{}", path, entryBuilder); + + filerClient.updateEntry(getParentDirectory(path), entryBuilder.build()); + + } + + public void setPermission(Path path, FsPermission permission) { + + LOG.debug("setPermission path:{} permission:{}", path, permission); + + FilerProto.Entry entry = lookupEntry(path); + if (entry == null) { + LOG.debug("setPermission path:{} entry:{}", path, entry); + return; + } + + FilerProto.Entry.Builder entryBuilder = entry.toBuilder(); + FilerProto.FuseAttributes.Builder attributesBuilder = entry.getAttributes().toBuilder(); + + attributesBuilder.setFileMode(permissionToMode(permission, entry.getIsDirectory())); + + entryBuilder.setAttributes(attributesBuilder); + + LOG.debug("setPermission path:{} entry:{}", path, entryBuilder); + + filerClient.updateEntry(getParentDirectory(path), entryBuilder.build()); + + } + +} diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java new file mode 100644 index 000000000..90c14c772 --- /dev/null +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedInputStream.java @@ -0,0 +1,371 @@ +package seaweed.hdfs; + +// based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream + +import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.FSExceptionMessages; +import org.apache.hadoop.fs.FSInputStream; +import org.apache.hadoop.fs.FileSystem.Statistics; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import seaweedfs.client.FilerGrpcClient; +import seaweedfs.client.FilerProto; +import seaweedfs.client.SeaweedRead; + +import java.io.EOFException; +import java.io.IOException; +import java.util.List; + +public class SeaweedInputStream extends FSInputStream { + + private static final Logger LOG = LoggerFactory.getLogger(SeaweedInputStream.class); + + private final FilerGrpcClient filerGrpcClient; + private final Statistics statistics; + private final String path; + private final FilerProto.Entry entry; + private final List<SeaweedRead.VisibleInterval> visibleIntervalList; + private final long contentLength; + private final int bufferSize; // default buffer size + private final int readAheadQueueDepth; // initialized in constructor + private final boolean readAheadEnabled; // whether enable readAhead; + + private byte[] buffer = null; // will be initialized on first use + + private long fCursor = 0; // cursor of buffer within file - offset of next byte to read from remote server + private long fCursorAfterLastRead = -1; + private int bCursor = 0; // cursor of read within buffer - offset of next byte to be returned from buffer + private int limit = 0; // offset of next byte to be read into buffer from service (i.e., upper marker+1 + // of valid bytes in buffer) + private boolean closed = false; + + public SeaweedInputStream( + final FilerGrpcClient filerGrpcClient, + final Statistics statistics, + final String path, + final FilerProto.Entry entry, + final int bufferSize, + final int readAheadQueueDepth) { + this.filerGrpcClient = filerGrpcClient; + this.statistics = statistics; + this.path = path; + this.entry = entry; + this.contentLength = SeaweedRead.totalSize(entry.getChunksList()); + this.bufferSize = bufferSize; + this.readAheadQueueDepth = (readAheadQueueDepth >= 0) ? readAheadQueueDepth : Runtime.getRuntime().availableProcessors(); + this.readAheadEnabled = true; + + this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(entry.getChunksList()); + + LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList); + + } + + public String getPath() { + return path; + } + + @Override + public int read() throws IOException { + byte[] b = new byte[1]; + int numberOfBytesRead = read(b, 0, 1); + if (numberOfBytesRead < 0) { + return -1; + } else { + return (b[0] & 0xFF); + } + } + + @Override + public synchronized int read(final byte[] b, final int off, final int len) throws IOException { + int currentOff = off; + int currentLen = len; + int lastReadBytes; + int totalReadBytes = 0; + do { + lastReadBytes = readOneBlock(b, currentOff, currentLen); + if (lastReadBytes > 0) { + currentOff += lastReadBytes; + currentLen -= lastReadBytes; + totalReadBytes += lastReadBytes; + } + if (currentLen <= 0 || currentLen > b.length - currentOff) { + break; + } + } while (lastReadBytes > 0); + return totalReadBytes > 0 ? totalReadBytes : lastReadBytes; + } + + private int readOneBlock(final byte[] b, final int off, final int len) throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + + Preconditions.checkNotNull(b); + + if (len == 0) { + return 0; + } + + if (this.available() == 0) { + return -1; + } + + if (off < 0 || len < 0 || len > b.length - off) { + throw new IndexOutOfBoundsException(); + } + + //If buffer is empty, then fill the buffer. + if (bCursor == limit) { + //If EOF, then return -1 + if (fCursor >= contentLength) { + return -1; + } + + long bytesRead = 0; + //reset buffer to initial state - i.e., throw away existing data + bCursor = 0; + limit = 0; + if (buffer == null) { + buffer = new byte[bufferSize]; + } + + // Enable readAhead when reading sequentially + if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize) { + bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false); + } else { + bytesRead = readInternal(fCursor, buffer, 0, b.length, true); + } + + if (bytesRead == -1) { + return -1; + } + + limit += bytesRead; + fCursor += bytesRead; + fCursorAfterLastRead = fCursor; + } + + //If there is anything in the buffer, then return lesser of (requested bytes) and (bytes in buffer) + //(bytes returned may be less than requested) + int bytesRemaining = limit - bCursor; + int bytesToRead = Math.min(len, bytesRemaining); + System.arraycopy(buffer, bCursor, b, off, bytesToRead); + bCursor += bytesToRead; + if (statistics != null) { + statistics.incrementBytesRead(bytesToRead); + } + return bytesToRead; + } + + + private int readInternal(final long position, final byte[] b, final int offset, final int length, + final boolean bypassReadAhead) throws IOException { + if (readAheadEnabled && !bypassReadAhead) { + // try reading from read-ahead + if (offset != 0) { + throw new IllegalArgumentException("readahead buffers cannot have non-zero buffer offsets"); + } + int receivedBytes; + + // queue read-aheads + int numReadAheads = this.readAheadQueueDepth; + long nextSize; + long nextOffset = position; + while (numReadAheads > 0 && nextOffset < contentLength) { + nextSize = Math.min((long) bufferSize, contentLength - nextOffset); + ReadBufferManager.getBufferManager().queueReadAhead(this, nextOffset, (int) nextSize); + nextOffset = nextOffset + nextSize; + numReadAheads--; + } + + // try reading from buffers first + receivedBytes = ReadBufferManager.getBufferManager().getBlock(this, position, length, b); + if (receivedBytes > 0) { + return receivedBytes; + } + + // got nothing from read-ahead, do our own read now + receivedBytes = readRemote(position, b, offset, length); + return receivedBytes; + } else { + return readRemote(position, b, offset, length); + } + } + + int readRemote(long position, byte[] b, int offset, int length) throws IOException { + if (position < 0) { + throw new IllegalArgumentException("attempting to read from negative offset"); + } + if (position >= contentLength) { + return -1; // Hadoop prefers -1 to EOFException + } + if (b == null) { + throw new IllegalArgumentException("null byte array passed in to read() method"); + } + if (offset >= b.length) { + throw new IllegalArgumentException("offset greater than length of array"); + } + if (length < 0) { + throw new IllegalArgumentException("requested read length is less than zero"); + } + if (length > (b.length - offset)) { + throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer"); + } + + long bytesRead = SeaweedRead.read(filerGrpcClient, visibleIntervalList, position, b, offset, length); + if (bytesRead > Integer.MAX_VALUE) { + throw new IOException("Unexpected Content-Length"); + } + return (int) bytesRead; + } + + /** + * Seek to given position in stream. + * + * @param n position to seek to + * @throws IOException if there is an error + * @throws EOFException if attempting to seek past end of file + */ + @Override + public synchronized void seek(long n) throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + if (n < 0) { + throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); + } + if (n > contentLength) { + throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); + } + + if (n >= fCursor - limit && n <= fCursor) { // within buffer + bCursor = (int) (n - (fCursor - limit)); + return; + } + + // next read will read from here + fCursor = n; + + //invalidate buffer + limit = 0; + bCursor = 0; + } + + @Override + public synchronized long skip(long n) throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + long currentPos = getPos(); + if (currentPos == contentLength) { + if (n > 0) { + throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); + } + } + long newPos = currentPos + n; + if (newPos < 0) { + newPos = 0; + n = newPos - currentPos; + } + if (newPos > contentLength) { + newPos = contentLength; + n = newPos - currentPos; + } + seek(newPos); + return n; + } + + /** + * Return the size of the remaining available bytes + * if the size is less than or equal to {@link Integer#MAX_VALUE}, + * otherwise, return {@link Integer#MAX_VALUE}. + * <p> + * This is to match the behavior of DFSInputStream.available(), + * which some clients may rely on (HBase write-ahead log reading in + * particular). + */ + @Override + public synchronized int available() throws IOException { + if (closed) { + throw new IOException( + FSExceptionMessages.STREAM_IS_CLOSED); + } + final long remaining = this.contentLength - this.getPos(); + return remaining <= Integer.MAX_VALUE + ? (int) remaining : Integer.MAX_VALUE; + } + + /** + * Returns the length of the file that this stream refers to. Note that the length returned is the length + * as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, + * they wont be reflected in the returned length. + * + * @return length of the file. + * @throws IOException if the stream is closed + */ + public long length() throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + return contentLength; + } + + /** + * Return the current offset from the start of the file + * + * @throws IOException throws {@link IOException} if there is an error + */ + @Override + public synchronized long getPos() throws IOException { + if (closed) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } + return fCursor - limit + bCursor; + } + + /** + * Seeks a different copy of the data. Returns true if + * found a new source, false otherwise. + * + * @throws IOException throws {@link IOException} if there is an error + */ + @Override + public boolean seekToNewSource(long l) throws IOException { + return false; + } + + @Override + public synchronized void close() throws IOException { + closed = true; + buffer = null; // de-reference the buffer so it can be GC'ed sooner + } + + /** + * Not supported by this stream. Throws {@link UnsupportedOperationException} + * + * @param readlimit ignored + */ + @Override + public synchronized void mark(int readlimit) { + throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); + } + + /** + * Not supported by this stream. Throws {@link UnsupportedOperationException} + */ + @Override + public synchronized void reset() throws IOException { + throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); + } + + /** + * gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. + * + * @return always {@code false} + */ + @Override + public boolean markSupported() { + return false; + } +} diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedOutputStream.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedOutputStream.java new file mode 100644 index 000000000..4f307ff96 --- /dev/null +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedOutputStream.java @@ -0,0 +1,335 @@ +package seaweed.hdfs; + +// adapted from org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream + +import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.FSExceptionMessages; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.fs.Syncable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import seaweedfs.client.FilerGrpcClient; +import seaweedfs.client.FilerProto; +import seaweedfs.client.SeaweedWrite; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.io.OutputStream; +import java.util.Locale; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import static seaweed.hdfs.SeaweedFileSystemStore.getParentDirectory; + +public class SeaweedOutputStream extends OutputStream implements Syncable, StreamCapabilities { + + private static final Logger LOG = LoggerFactory.getLogger(SeaweedOutputStream.class); + + private final FilerGrpcClient filerGrpcClient; + private final Path path; + private final int bufferSize; + private final int maxConcurrentRequestCount; + private final ThreadPoolExecutor threadExecutor; + private final ExecutorCompletionService<Void> completionService; + private FilerProto.Entry.Builder entry; + private long position; + private boolean closed; + private boolean supportFlush = true; + private volatile IOException lastError; + private long lastFlushOffset; + private long lastTotalAppendOffset = 0; + private byte[] buffer; + private int bufferIndex; + private ConcurrentLinkedDeque<WriteOperation> writeOperations; + private String replication = "000"; + + public SeaweedOutputStream(FilerGrpcClient filerGrpcClient, final Path path, FilerProto.Entry.Builder entry, + final long position, final int bufferSize, final String replication) { + this.filerGrpcClient = filerGrpcClient; + this.replication = replication; + this.path = path; + this.position = position; + this.closed = false; + this.lastError = null; + this.lastFlushOffset = 0; + this.bufferSize = bufferSize; + this.buffer = new byte[bufferSize]; + this.bufferIndex = 0; + this.writeOperations = new ConcurrentLinkedDeque<>(); + + this.maxConcurrentRequestCount = 4 * Runtime.getRuntime().availableProcessors(); + + this.threadExecutor + = new ThreadPoolExecutor(maxConcurrentRequestCount, + maxConcurrentRequestCount, + 10L, + TimeUnit.SECONDS, + new LinkedBlockingQueue<Runnable>()); + this.completionService = new ExecutorCompletionService<>(this.threadExecutor); + + this.entry = entry; + + } + + private synchronized void flushWrittenBytesToServiceInternal(final long offset) throws IOException { + + LOG.debug("SeaweedWrite.writeMeta path: {} entry:{}", path, entry); + + try { + SeaweedWrite.writeMeta(filerGrpcClient, getParentDirectory(path), entry); + } catch (Exception ex) { + throw new IOException(ex); + } + this.lastFlushOffset = offset; + } + + @Override + public void write(final int byteVal) throws IOException { + write(new byte[]{(byte) (byteVal & 0xFF)}); + } + + @Override + public synchronized void write(final byte[] data, final int off, final int length) + throws IOException { + maybeThrowLastError(); + + Preconditions.checkArgument(data != null, "null data"); + + if (off < 0 || length < 0 || length > data.length - off) { + throw new IndexOutOfBoundsException(); + } + + int currentOffset = off; + int writableBytes = bufferSize - bufferIndex; + int numberOfBytesToWrite = length; + + while (numberOfBytesToWrite > 0) { + if (writableBytes <= numberOfBytesToWrite) { + System.arraycopy(data, currentOffset, buffer, bufferIndex, writableBytes); + bufferIndex += writableBytes; + writeCurrentBufferToService(); + currentOffset += writableBytes; + numberOfBytesToWrite = numberOfBytesToWrite - writableBytes; + } else { + System.arraycopy(data, currentOffset, buffer, bufferIndex, numberOfBytesToWrite); + bufferIndex += numberOfBytesToWrite; + numberOfBytesToWrite = 0; + } + + writableBytes = bufferSize - bufferIndex; + } + } + + /** + * Flushes this output stream and forces any buffered output bytes to be + * written out. If any data remains in the payload it is committed to the + * service. Data is queued for writing and forced out to the service + * before the call returns. + */ + @Override + public void flush() throws IOException { + if (supportFlush) { + flushInternalAsync(); + } + } + + /** + * Similar to posix fsync, flush out the data in client's user buffer + * all the way to the disk device (but the disk may have it in its cache). + * + * @throws IOException if error occurs + */ + @Override + public void hsync() throws IOException { + if (supportFlush) { + flushInternal(); + } + } + + /** + * Flush out the data in client's user buffer. After the return of + * this call, new readers will see the data. + * + * @throws IOException if any error occurs + */ + @Override + public void hflush() throws IOException { + if (supportFlush) { + flushInternal(); + } + } + + /** + * Query the stream for a specific capability. + * + * @param capability string to query the stream support for. + * @return true for hsync and hflush. + */ + @Override + public boolean hasCapability(String capability) { + switch (capability.toLowerCase(Locale.ENGLISH)) { + case StreamCapabilities.HSYNC: + case StreamCapabilities.HFLUSH: + return supportFlush; + default: + return false; + } + } + + /** + * Force all data in the output stream to be written to Azure storage. + * Wait to return until this is complete. Close the access to the stream and + * shutdown the upload thread pool. + * If the blob was created, its lease will be released. + * Any error encountered caught in threads and stored will be rethrown here + * after cleanup. + */ + @Override + public synchronized void close() throws IOException { + if (closed) { + return; + } + + LOG.debug("close path: {}", path); + try { + flushInternal(); + threadExecutor.shutdown(); + } finally { + lastError = new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + buffer = null; + bufferIndex = 0; + closed = true; + writeOperations.clear(); + if (!threadExecutor.isShutdown()) { + threadExecutor.shutdownNow(); + } + } + } + + private synchronized void writeCurrentBufferToService() throws IOException { + if (bufferIndex == 0) { + return; + } + + final byte[] bytes = buffer; + final int bytesLength = bufferIndex; + + buffer = new byte[bufferSize]; + bufferIndex = 0; + final long offset = position; + position += bytesLength; + + if (threadExecutor.getQueue().size() >= maxConcurrentRequestCount * 2) { + waitForTaskToComplete(); + } + + final Future<Void> job = completionService.submit(new Callable<Void>() { + @Override + public Void call() throws Exception { + // originally: client.append(path, offset, bytes, 0, bytesLength); + SeaweedWrite.writeData(entry, replication, filerGrpcClient, offset, bytes, 0, bytesLength); + return null; + } + }); + + writeOperations.add(new WriteOperation(job, offset, bytesLength)); + + // Try to shrink the queue + shrinkWriteOperationQueue(); + } + + private void waitForTaskToComplete() throws IOException { + boolean completed; + for (completed = false; completionService.poll() != null; completed = true) { + // keep polling until there is no data + } + + if (!completed) { + try { + completionService.take(); + } catch (InterruptedException e) { + lastError = (IOException) new InterruptedIOException(e.toString()).initCause(e); + throw lastError; + } + } + } + + private void maybeThrowLastError() throws IOException { + if (lastError != null) { + throw lastError; + } + } + + /** + * Try to remove the completed write operations from the beginning of write + * operation FIFO queue. + */ + private synchronized void shrinkWriteOperationQueue() throws IOException { + try { + while (writeOperations.peek() != null && writeOperations.peek().task.isDone()) { + writeOperations.peek().task.get(); + lastTotalAppendOffset += writeOperations.peek().length; + writeOperations.remove(); + } + } catch (Exception e) { + lastError = new IOException(e); + throw lastError; + } + } + + private synchronized void flushInternal() throws IOException { + maybeThrowLastError(); + writeCurrentBufferToService(); + flushWrittenBytesToService(); + } + + private synchronized void flushInternalAsync() throws IOException { + maybeThrowLastError(); + writeCurrentBufferToService(); + flushWrittenBytesToServiceAsync(); + } + + private synchronized void flushWrittenBytesToService() throws IOException { + for (WriteOperation writeOperation : writeOperations) { + try { + writeOperation.task.get(); + } catch (Exception ex) { + lastError = new IOException(ex); + throw lastError; + } + } + LOG.debug("flushWrittenBytesToService: {} position:{}", path, position); + flushWrittenBytesToServiceInternal(position); + } + + private synchronized void flushWrittenBytesToServiceAsync() throws IOException { + shrinkWriteOperationQueue(); + + if (this.lastTotalAppendOffset > this.lastFlushOffset) { + this.flushWrittenBytesToServiceInternal(this.lastTotalAppendOffset); + } + } + + private static class WriteOperation { + private final Future<Void> task; + private final long startOffset; + private final long length; + + WriteOperation(final Future<Void> task, final long startOffset, final long length) { + Preconditions.checkNotNull(task, "task"); + Preconditions.checkArgument(startOffset >= 0, "startOffset"); + Preconditions.checkArgument(length >= 0, "length"); + + this.task = task; + this.startOffset = startOffset; + this.length = length; + } + } + +} diff --git a/other/java/s3copy/copier/pom.xml b/other/java/s3copier/pom.xml index f8cb9e91c..f8cb9e91c 100644 --- a/other/java/s3copy/copier/pom.xml +++ b/other/java/s3copier/pom.xml diff --git a/other/java/s3copy/copier/src/main/java/com/seaweedfs/s3/HighLevelMultipartUpload.java b/other/java/s3copier/src/main/java/com/seaweedfs/s3/HighLevelMultipartUpload.java index b86df95a0..b86df95a0 100644 --- a/other/java/s3copy/copier/src/main/java/com/seaweedfs/s3/HighLevelMultipartUpload.java +++ b/other/java/s3copier/src/main/java/com/seaweedfs/s3/HighLevelMultipartUpload.java diff --git a/other/java/s3copy/copier/src/main/java/com/seaweedfs/s3/PutObject.java b/other/java/s3copier/src/main/java/com/seaweedfs/s3/PutObject.java index 021d01e34..c310f0162 100644 --- a/other/java/s3copy/copier/src/main/java/com/seaweedfs/s3/PutObject.java +++ b/other/java/s3copier/src/main/java/com/seaweedfs/s3/PutObject.java @@ -12,13 +12,21 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.util.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import java.io.File; +import java.io.IOException; /** * Hello world! */ public class PutObject { + + private static Log log = LogFactory.getLog(PutObject.class); + public static void main(String[] args) { AWSCredentials credentials = new BasicAWSCredentials("ANY-ACCESSKEYID", "ANY-SECRETACCESSKEY"); @@ -39,10 +47,11 @@ public class PutObject { String fileObjKeyName = "fileObject2"; String fileName = args[0]; + String stringContent = "Uploaded String Object v3"; try { // Upload a text string as a new object. - s3Client.putObject(bucketName, stringObjKeyName, "Uploaded String Object v3"); + s3Client.putObject(bucketName, stringObjKeyName, stringContent); // Upload a file as a new object with ContentType and title specified. PutObjectRequest request = new PutObjectRequest(bucketName, fileObjKeyName, new File(fileName)); @@ -51,6 +60,32 @@ public class PutObject { metadata.addUserMetadata("x-amz-meta-title", "someTitle"); request.setMetadata(metadata); s3Client.putObject(request); + + // test reads + S3Object written = s3Client.getObject(bucketName, stringObjKeyName); + try { + String expected = IOUtils.toString(written.getObjectContent()); + + if (!stringContent.equals(expected)){ + System.out.println("Failed to put and get back the content!"); + } + + } catch (IOException e) { + throw new SdkClientException("Error streaming content from S3 during download"); + } finally { + IOUtils.closeQuietly(written, log); + } + + // test deletes + s3Client.deleteObject(bucketName, stringObjKeyName); + + + // delete bucket + String tmpBucket = "tmpbucket"; + s3Client.createBucket(tmpBucket); + s3Client.putObject(tmpBucket, stringObjKeyName, stringContent); + s3Client.deleteBucket(tmpBucket); + } catch (AmazonServiceException e) { // The call was transmitted successfully, but Amazon S3 couldn't process // it, so it returned an error response. diff --git a/other/java/s3copy/copier/src/test/java/com/seaweedfs/s3/PutObjectTest.java b/other/java/s3copier/src/test/java/com/seaweedfs/s3/PutObjectTest.java index 0404dab60..0404dab60 100644 --- a/other/java/s3copy/copier/src/test/java/com/seaweedfs/s3/PutObjectTest.java +++ b/other/java/s3copier/src/test/java/com/seaweedfs/s3/PutObjectTest.java diff --git a/unmaintained/change_superblock/change_superblock.go b/unmaintained/change_superblock/change_superblock.go index 779580a9b..afe651c4e 100644 --- a/unmaintained/change_superblock/change_superblock.go +++ b/unmaintained/change_superblock/change_superblock.go @@ -8,7 +8,9 @@ import ( "strconv" "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" ) var ( @@ -46,9 +48,10 @@ func main() { if err != nil { glog.Fatalf("Open Volume Data File [ERROR]: %v", err) } - defer datFile.Close() + datBackend := backend.NewDiskFile(datFile) + defer datBackend.Close() - superBlock, err := storage.ReadSuperBlock(datFile) + superBlock, err := super_block.ReadSuperBlock(datBackend) if err != nil { glog.Fatalf("cannot parse existing super block: %v", err) @@ -60,7 +63,7 @@ func main() { hasChange := false if *targetReplica != "" { - replica, err := storage.NewReplicaPlacementFromString(*targetReplica) + replica, err := super_block.NewReplicaPlacementFromString(*targetReplica) if err != nil { glog.Fatalf("cannot parse target replica %s: %v", *targetReplica, err) @@ -73,7 +76,7 @@ func main() { } if *targetTTL != "" { - ttl, err := storage.ReadTTL(*targetTTL) + ttl, err := needle.ReadTTL(*targetTTL) if err != nil { glog.Fatalf("cannot parse target ttl %s: %v", *targetTTL, err) diff --git a/unmaintained/compact_leveldb/compact_leveldb.go b/unmaintained/compact_leveldb/compact_leveldb.go new file mode 100644 index 000000000..317356c3f --- /dev/null +++ b/unmaintained/compact_leveldb/compact_leveldb.go @@ -0,0 +1,35 @@ +package main + +import ( + "flag" + "log" + + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + dir = flag.String("dir", ".", "data directory to store leveldb files") +) + +func main() { + + flag.Parse() + + opts := &opt.Options{ + BlockCacheCapacity: 32 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 16 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 10, + OpenFilesCacheCapacity: -1, + } + + db, err := leveldb.OpenFile(*dir, opts) + if err != nil { + log.Fatal(err) + } + defer db.Close() + if err := db.CompactRange(util.Range{}); err != nil { + log.Fatal(err) + } +} diff --git a/unmaintained/fix_dat/fix_dat.go b/unmaintained/fix_dat/fix_dat.go index 90287ecd6..d6110d870 100644 --- a/unmaintained/fix_dat/fix_dat.go +++ b/unmaintained/fix_dat/fix_dat.go @@ -9,9 +9,11 @@ import ( "strconv" "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/storage" - "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" ) var ( @@ -43,11 +45,13 @@ func main() { glog.Fatalf("Read Volume Index %v", err) } defer indexFile.Close() - datFile, err := os.OpenFile(path.Join(*fixVolumePath, fileName+".dat"), os.O_RDONLY, 0644) + datFileName := path.Join(*fixVolumePath, fileName+".dat") + datFile, err := os.OpenFile(datFileName, os.O_RDONLY, 0644) if err != nil { glog.Fatalf("Read Volume Data %v", err) } - defer datFile.Close() + datBackend := backend.NewDiskFile(datFile) + defer datBackend.Close() newDatFile, err := os.Create(path.Join(*fixVolumePath, fileName+".dat_fixed")) if err != nil { @@ -55,21 +59,21 @@ func main() { } defer newDatFile.Close() - superBlock, err := storage.ReadSuperBlock(datFile) + superBlock, err := super_block.ReadSuperBlock(datBackend) if err != nil { glog.Fatalf("Read Volume Data superblock %v", err) } newDatFile.Write(superBlock.Bytes()) - iterateEntries(datFile, indexFile, func(n *storage.Needle, offset int64) { + iterateEntries(datBackend, indexFile, func(n *needle.Needle, offset int64) { fmt.Printf("needle id=%v name=%s size=%d dataSize=%d\n", n.Id, string(n.Name), n.Size, n.DataSize) - s, _, e := n.Append(newDatFile, superBlock.Version()) + _, s, _, e := n.Append(datBackend, superBlock.Version) fmt.Printf("size %d error %v\n", s, e) }) } -func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needle, offset int64)) { +func iterateEntries(datBackend backend.BackendStorageFile, idxFile *os.File, visitNeedle func(n *needle.Needle, offset int64)) { // start to read index file var readerOffset int64 bytes := make([]byte, 16) @@ -77,14 +81,14 @@ func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needl readerOffset += int64(count) // start to read dat file - superblock, err := storage.ReadSuperBlock(datFile) + superBlock, err := super_block.ReadSuperBlock(datBackend) if err != nil { fmt.Printf("cannot read dat file super block: %v", err) return } - offset := int64(superblock.BlockSize()) - version := superblock.Version() - n, rest, err := storage.ReadNeedleHeader(datFile, version, offset) + offset := int64(superBlock.BlockSize()) + version := superBlock.Version + n, _, rest, err := needle.ReadNeedleHeader(datBackend, version, offset) if err != nil { fmt.Printf("cannot read needle header: %v", err) return @@ -106,7 +110,7 @@ func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needl fmt.Printf("key: %d offsetFromIndex %d n.Size %d sizeFromIndex:%d\n", key, offsetFromIndex, n.Size, sizeFromIndex) - rest = storage.NeedleBodyLength(sizeFromIndex, version) + rest = needle.NeedleBodyLength(sizeFromIndex, version) func() { defer func() { @@ -114,7 +118,7 @@ func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needl fmt.Println("Recovered in f", r) } }() - if err = n.ReadNeedleBody(datFile, version, offset+int64(types.NeedleEntrySize), rest); err != nil { + if _, err = n.ReadNeedleBody(datBackend, version, offset+int64(types.NeedleHeaderSize), rest); err != nil { fmt.Printf("cannot read needle body: offset %d body %d %v\n", offset, rest, err) } }() @@ -124,9 +128,9 @@ func iterateEntries(datFile, idxFile *os.File, visitNeedle func(n *storage.Needl } visitNeedle(n, offset) - offset += types.NeedleEntrySize + rest + offset += types.NeedleHeaderSize + rest //fmt.Printf("==> new entry offset %d\n", offset) - if n, rest, err = storage.ReadNeedleHeader(datFile, version, offset); err != nil { + if n, _, rest, err = needle.ReadNeedleHeader(datBackend, version, offset); err != nil { if err == io.EOF { return } diff --git a/unmaintained/load_test/load_test_leveldb/load_test_leveldb.go b/unmaintained/load_test/load_test_leveldb/load_test_leveldb.go new file mode 100644 index 000000000..43dfb0e21 --- /dev/null +++ b/unmaintained/load_test/load_test_leveldb/load_test_leveldb.go @@ -0,0 +1,155 @@ +package load_test_leveldb + +import ( + "crypto/md5" + "flag" + "fmt" + "io" + "log" + "math/rand" + "os" + "sync" + "time" + + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/opt" +) + +var ( + dir = flag.String("dir", "./t", "directory to store level db files") + useHash = flag.Bool("isHash", false, "hash the path as the key") + dbCount = flag.Int("dbCount", 1, "the number of leveldb") +) + +func main() { + + flag.Parse() + + totalTenants := 300 + totalYears := 3 + + opts := &opt.Options{ + BlockCacheCapacity: 32 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 16 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 4, + } + + var dbs []*leveldb.DB + var chans []chan string + for d := 0; d < *dbCount; d++ { + dbFolder := fmt.Sprintf("%s/%02d", *dir, d) + os.MkdirAll(dbFolder, 0755) + db, err := leveldb.OpenFile(dbFolder, opts) + if err != nil { + log.Printf("filer store open dir %s: %v", *dir, err) + return + } + dbs = append(dbs, db) + chans = append(chans, make(chan string, 1024)) + } + + var wg sync.WaitGroup + for d := 0; d < *dbCount; d++ { + wg.Add(1) + go func(d int) { + defer wg.Done() + + ch := chans[d] + db := dbs[d] + + for p := range ch { + if *useHash { + insertAsHash(db, p) + } else { + insertAsFullPath(db, p) + } + } + }(d) + } + + counter := int64(0) + lastResetTime := time.Now() + + r := rand.New(rand.NewSource(35)) + + for y := 0; y < totalYears; y++ { + for m := 0; m < 12; m++ { + for d := 0; d < 31; d++ { + for h := 0; h < 24; h++ { + for min := 0; min < 60; min++ { + for i := 0; i < totalTenants; i++ { + p := fmt.Sprintf("tenent%03d/%4d/%02d/%02d/%02d/%02d", i, 2015+y, 1+m, 1+d, h, min) + + x := r.Intn(*dbCount) + + chans[x] <- p + + counter++ + } + + t := time.Now() + if lastResetTime.Add(time.Second).Before(t) { + p := fmt.Sprintf("%4d/%02d/%02d/%02d/%02d", 2015+y, 1+m, 1+d, h, min) + fmt.Printf("%s = %4d put/sec\n", p, counter) + counter = 0 + lastResetTime = t + } + } + } + } + } + } + + for d := 0; d < *dbCount; d++ { + close(chans[d]) + } + + wg.Wait() + +} + +func insertAsFullPath(db *leveldb.DB, p string) { + _, getErr := db.Get([]byte(p), nil) + if getErr == leveldb.ErrNotFound { + putErr := db.Put([]byte(p), []byte(p), nil) + if putErr != nil { + log.Printf("failed to put %s", p) + } + } +} + +func insertAsHash(db *leveldb.DB, p string) { + key := fmt.Sprintf("%d:%s", hashToLong(p), p) + _, getErr := db.Get([]byte(key), nil) + if getErr == leveldb.ErrNotFound { + putErr := db.Put([]byte(key), []byte(p), nil) + if putErr != nil { + log.Printf("failed to put %s", p) + } + } +} + +func hashToLong(dir string) (v int64) { + h := md5.New() + io.WriteString(h, dir) + + b := h.Sum(nil) + + v += int64(b[0]) + v <<= 8 + v += int64(b[1]) + v <<= 8 + v += int64(b[2]) + v <<= 8 + v += int64(b[3]) + v <<= 8 + v += int64(b[4]) + v <<= 8 + v += int64(b[5]) + v <<= 8 + v += int64(b[6]) + v <<= 8 + v += int64(b[7]) + + return +} diff --git a/unmaintained/remove_duplicate_fids/remove_duplicate_fids.go b/unmaintained/remove_duplicate_fids/remove_duplicate_fids.go new file mode 100644 index 000000000..84173a663 --- /dev/null +++ b/unmaintained/remove_duplicate_fids/remove_duplicate_fids.go @@ -0,0 +1,95 @@ +package main + +import ( + "flag" + "fmt" + "os" + "path/filepath" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" +) + +var ( + volumePath = flag.String("dir", "/tmp", "data directory to store files") + volumeCollection = flag.String("collection", "", "the volume collection name") + volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.") +) + +func Checksum(n *needle.Needle) string { + return fmt.Sprintf("%s%x", n.Id, n.Cookie) +} + +type VolumeFileScanner4SeeDat struct { + version needle.Version + block super_block.SuperBlock + + dir string + hashes map[string]bool + dat *os.File + datBackend backend.BackendStorageFile +} + +func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock super_block.SuperBlock) error { + scanner.version = superBlock.Version + scanner.block = superBlock + return nil + +} +func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool { + return true +} + +func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { + + if scanner.datBackend == nil { + newFileName := filepath.Join(*volumePath, "dat_fixed") + newDatFile, err := os.Create(newFileName) + if err != nil { + glog.Fatalf("Write New Volume Data %v", err) + } + scanner.datBackend = backend.NewDiskFile(newDatFile) + scanner.datBackend.WriteAt(scanner.block.Bytes(), 0) + } + + checksum := Checksum(n) + + if scanner.hashes[checksum] { + glog.V(0).Infof("duplicate checksum:%s fid:%d,%s%x @ offset:%d", checksum, *volumeId, n.Id, n.Cookie, offset) + return nil + } + scanner.hashes[checksum] = true + + _, s, _, e := n.Append(scanner.datBackend, scanner.version) + fmt.Printf("size %d error %v\n", s, e) + + return nil +} + +func main() { + flag.Parse() + + vid := needle.VolumeId(*volumeId) + + outpath, _ := filepath.Abs(filepath.Dir(os.Args[0])) + + scanner := &VolumeFileScanner4SeeDat{ + dir: filepath.Join(outpath, "out"), + hashes: map[string]bool{}, + } + + if _, err := os.Stat(scanner.dir); err != nil { + if err := os.MkdirAll(scanner.dir, os.ModePerm); err != nil { + glog.Fatalf("could not create output dir : %s", err) + } + } + + err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner) + if err != nil { + glog.Fatalf("Reading Volume File [ERROR] %s\n", err) + } + +} diff --git a/unmaintained/repeated_vacuum/repeated_vacuum.go b/unmaintained/repeated_vacuum/repeated_vacuum.go new file mode 100644 index 000000000..96d4ccdf6 --- /dev/null +++ b/unmaintained/repeated_vacuum/repeated_vacuum.go @@ -0,0 +1,60 @@ +package main + +import ( + "flag" + "fmt" + "log" + "math/rand" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + master = flag.String("master", "127.0.0.1:9333", "the master server") + repeat = flag.Int("n", 5, "repeat how many times") + garbageThreshold = flag.Float64("garbageThreshold", 0.3, "garbageThreshold") +) + +func main() { + flag.Parse() + + util.LoadConfiguration("security", false) + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") + + genFile(grpcDialOption, 0) + + for i := 0; i < *repeat; i++ { + // create 2 files, and delete one of them + + assignResult, targetUrl := genFile(grpcDialOption, i) + + util.Delete(targetUrl, string(assignResult.Auth)) + + println("vacuum", i, "threshold", *garbageThreshold) + util.Get(fmt.Sprintf("http://%s/vol/vacuum?garbageThreshold=%f", *master, *garbageThreshold)) + + } + +} + +func genFile(grpcDialOption grpc.DialOption, i int) (*operation.AssignResult, string) { + assignResult, err := operation.Assign(*master, grpcDialOption, &operation.VolumeAssignRequest{Count: 1}) + if err != nil { + log.Fatalf("assign: %v", err) + } + + data := make([]byte, 1024) + rand.Read(data) + + targetUrl := fmt.Sprintf("http://%s/%s", assignResult.Url, assignResult.Fid) + + _, err = operation.UploadData(targetUrl, fmt.Sprintf("test%d", i), false, data, false, "bench/test", nil, assignResult.Auth) + if err != nil { + log.Fatalf("upload: %v", err) + } + return assignResult, targetUrl +} diff --git a/unmaintained/see_dat/see_dat.go b/unmaintained/see_dat/see_dat.go new file mode 100644 index 000000000..efc58e751 --- /dev/null +++ b/unmaintained/see_dat/see_dat.go @@ -0,0 +1,48 @@ +package main + +import ( + "flag" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" +) + +var ( + volumePath = flag.String("dir", "/tmp", "data directory to store files") + volumeCollection = flag.String("collection", "", "the volume collection name") + volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.") +) + +type VolumeFileScanner4SeeDat struct { + version needle.Version +} + +func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock super_block.SuperBlock) error { + scanner.version = superBlock.Version + return nil + +} +func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool { + return true +} + +func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { + t := time.Unix(int64(n.AppendAtNs)/int64(time.Second), int64(n.AppendAtNs)%int64(time.Second)) + glog.V(0).Infof("%d,%s%x offset %d size %d cookie %x appendedAt %v", *volumeId, n.Id, n.Cookie, offset, n.Size, n.Cookie, t) + return nil +} + +func main() { + flag.Parse() + + vid := needle.VolumeId(*volumeId) + + scanner := &VolumeFileScanner4SeeDat{} + err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner) + if err != nil { + glog.Fatalf("Reading Volume File [ERROR] %s\n", err) + } +} diff --git a/unmaintained/see_idx/see_idx.go b/unmaintained/see_idx/see_idx.go index 9e8133dde..777af1821 100644 --- a/unmaintained/see_idx/see_idx.go +++ b/unmaintained/see_idx/see_idx.go @@ -2,13 +2,13 @@ package main import ( "flag" + "fmt" "os" "path" "strconv" "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/storage" - "fmt" + "github.com/chrislusf/seaweedfs/weed/storage/idx" "github.com/chrislusf/seaweedfs/weed/storage/types" ) @@ -35,7 +35,7 @@ func main() { } defer indexFile.Close() - storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error { + idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error { fmt.Printf("key:%v offset:%v size:%v\n", key, offset, size) return nil }) diff --git a/unmaintained/see_meta/see_meta.go b/unmaintained/see_meta/see_meta.go new file mode 100644 index 000000000..0d2ac8de1 --- /dev/null +++ b/unmaintained/see_meta/see_meta.go @@ -0,0 +1,68 @@ +package main + +import ( + "flag" + "fmt" + "io" + "log" + "os" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/golang/protobuf/proto" +) + +var ( + metaFile = flag.String("meta", "", "meta file generated via fs.meta.save") +) + +func main() { + flag.Parse() + + dst, err := os.OpenFile(*metaFile, os.O_RDONLY, 0644) + if err != nil { + log.Fatalf("failed to open %s: %v", *metaFile, err) + } + defer dst.Close() + + err = walkMetaFile(dst) + if err != nil { + log.Fatalf("failed to visit %s: %v", *metaFile, err) + } + +} + +func walkMetaFile(dst *os.File) error { + + sizeBuf := make([]byte, 4) + + for { + if n, err := dst.Read(sizeBuf); n != 4 { + if err == io.EOF { + return nil + } + return err + } + + size := util.BytesToUint32(sizeBuf) + + data := make([]byte, int(size)) + + if n, err := dst.Read(data); n != len(data) { + return err + } + + fullEntry := &filer_pb.FullEntry{} + if err := proto.Unmarshal(data, fullEntry); err != nil { + return err + } + + fmt.Fprintf(os.Stdout, "file %s %v\n", filer2.FullPath(fullEntry.Dir).Child(fullEntry.Entry.Name), fullEntry.Entry.Attributes.String()) + for i, chunk := range fullEntry.Entry.Chunks { + fmt.Fprintf(os.Stdout, " chunk %d %v\n", i+1, chunk.String()) + } + + } + +} diff --git a/unmaintained/stress_filer_upload/bench_filer_upload/bench_filer_upload.go b/unmaintained/stress_filer_upload/bench_filer_upload/bench_filer_upload.go new file mode 100644 index 000000000..b2e4b28c6 --- /dev/null +++ b/unmaintained/stress_filer_upload/bench_filer_upload/bench_filer_upload.go @@ -0,0 +1,136 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "math/rand" + "mime/multipart" + "net/http" + "os" + "strings" + "sync" + "time" +) + +var ( + size = flag.Int("size", 1024, "file size") + concurrency = flag.Int("c", 4, "concurrent number of uploads") + times = flag.Int("n", 1024, "repeated number of times") + fileCount = flag.Int("fileCount", 1, "number of files to write") + destination = flag.String("to", "http://localhost:8888/", "destination directory on filer") + + statsChan = make(chan stat, 8) +) + +type stat struct { + size int64 +} + +func main() { + + flag.Parse() + + data := make([]byte, *size) + println("data len", len(data)) + + var wg sync.WaitGroup + for x := 0; x < *concurrency; x++ { + wg.Add(1) + + go func(x int) { + defer wg.Done() + + client := &http.Client{Transport: &http.Transport{ + MaxConnsPerHost: 1024, + MaxIdleConnsPerHost: 1024, + }} + r := rand.New(rand.NewSource(time.Now().UnixNano() + int64(x))) + + for t := 0; t < *times; t++ { + for f := 0; f < *fileCount; f++ { + fn := r.Intn(*fileCount) + if size, err := uploadFileToFiler(client, data, fmt.Sprintf("file%04d", fn), *destination); err == nil { + statsChan <- stat{ + size: size, + } + } else { + log.Fatalf("client %d upload %d times: %v", x, t, err) + } + } + } + }(x) + } + + go func() { + ticker := time.NewTicker(1000 * time.Millisecond) + + var lastTime time.Time + var counter, size int64 + for { + select { + case stat := <-statsChan: + size += stat.size + counter++ + case x := <-ticker.C: + if !lastTime.IsZero() { + elapsed := x.Sub(lastTime).Seconds() + fmt.Fprintf(os.Stdout, "%.2f files/s, %.2f MB/s\n", + float64(counter)/elapsed, + float64(size/1024/1024)/elapsed) + } + lastTime = x + size = 0 + counter = 0 + } + } + }() + + wg.Wait() + +} + +func uploadFileToFiler(client *http.Client, data []byte, filename, destination string) (size int64, err error) { + + if !strings.HasSuffix(destination, "/") { + destination = destination + "/" + } + + body := &bytes.Buffer{} + writer := multipart.NewWriter(body) + part, err := writer.CreateFormFile("file", filename) + if err != nil { + return 0, fmt.Errorf("fail to create form %v: %v", filename, err) + } + + part.Write(data) + + err = writer.Close() + if err != nil { + return 0, fmt.Errorf("fail to write part %v: %v", filename, err) + } + + uri := destination + filename + + request, err := http.NewRequest("POST", uri, body) + request.Header.Set("Content-Type", writer.FormDataContentType()) + // request.Close = true // can not use this, which do not reuse http connection, impacting filer->volume also. + + resp, err := client.Do(request) + if err != nil { + return 0, fmt.Errorf("http POST %s: %v", uri, err) + } else { + body := &bytes.Buffer{} + _, err := body.ReadFrom(resp.Body) + if err != nil { + return 0, fmt.Errorf("read http POST %s response: %v", uri, err) + } + io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() + } + + return int64(len(data)), nil +} diff --git a/unmaintained/stress_filer_upload/stress_filer_upload_actual/stress_filer_upload.go b/unmaintained/stress_filer_upload/stress_filer_upload_actual/stress_filer_upload.go new file mode 100644 index 000000000..8b986b546 --- /dev/null +++ b/unmaintained/stress_filer_upload/stress_filer_upload_actual/stress_filer_upload.go @@ -0,0 +1,150 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "math/rand" + "mime/multipart" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + "time" +) + +var ( + dir = flag.String("dir", ".", "upload files under this directory") + concurrency = flag.Int("c", 1, "concurrent number of uploads") + times = flag.Int("n", 1, "repeated number of times") + destination = flag.String("to", "http://localhost:8888/", "destination directory on filer") + + statsChan = make(chan stat, 8) +) + +type stat struct { + size int64 +} + +func main() { + + flag.Parse() + + var fileNames []string + + files, err := ioutil.ReadDir(*dir) + if err != nil { + log.Fatalf("fail to read dir %v: %v", *dir, err) + } + + for _, file := range files { + if file.IsDir() { + continue + } + fileNames = append(fileNames, filepath.Join(*dir, file.Name())) + } + + var wg sync.WaitGroup + for x := 0; x < *concurrency; x++ { + wg.Add(1) + + client := &http.Client{} + + go func() { + defer wg.Done() + rand.Shuffle(len(fileNames), func(i, j int) { + fileNames[i], fileNames[j] = fileNames[j], fileNames[i] + }) + for t := 0; t < *times; t++ { + for _, filename := range fileNames { + if size, err := uploadFileToFiler(client, filename, *destination); err == nil { + statsChan <- stat{ + size: size, + } + } + } + } + }() + } + + go func() { + ticker := time.NewTicker(500 * time.Millisecond) + + var lastTime time.Time + var counter, size int64 + for { + select { + case stat := <-statsChan: + size += stat.size + counter++ + case x := <-ticker.C: + if !lastTime.IsZero() { + elapsed := x.Sub(lastTime).Seconds() + fmt.Fprintf(os.Stdout, "%.2f files/s, %.2f MB/s\n", + float64(counter)/elapsed, + float64(size/1024/1024)/elapsed) + } + lastTime = x + size = 0 + counter = 0 + } + } + }() + + wg.Wait() + +} + +func uploadFileToFiler(client *http.Client, filename, destination string) (size int64, err error) { + file, err := os.Open(filename) + if err != nil { + panic(err) + } + defer file.Close() + + fi, err := file.Stat() + + if !strings.HasSuffix(destination, "/") { + destination = destination + "/" + } + + body := &bytes.Buffer{} + writer := multipart.NewWriter(body) + part, err := writer.CreateFormFile("file", file.Name()) + if err != nil { + return 0, fmt.Errorf("fail to create form %v: %v", file.Name(), err) + } + _, err = io.Copy(part, file) + if err != nil { + return 0, fmt.Errorf("fail to write part %v: %v", file.Name(), err) + } + + err = writer.Close() + if err != nil { + return 0, fmt.Errorf("fail to write part %v: %v", file.Name(), err) + } + + uri := destination + file.Name() + + request, err := http.NewRequest("POST", uri, body) + request.Header.Set("Content-Type", writer.FormDataContentType()) + + resp, err := client.Do(request) + if err != nil { + return 0, fmt.Errorf("http POST %s: %v", uri, err) + } else { + body := &bytes.Buffer{} + _, err := body.ReadFrom(resp.Body) + if err != nil { + return 0, fmt.Errorf("read http POST %s response: %v", uri, err) + } + io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() + } + + return fi.Size(), nil +} diff --git a/unmaintained/volume_tailer/volume_tailer.go b/unmaintained/volume_tailer/volume_tailer.go new file mode 100644 index 000000000..3c2d36d22 --- /dev/null +++ b/unmaintained/volume_tailer/volume_tailer.go @@ -0,0 +1,70 @@ +package main + +import ( + "flag" + "log" + "time" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + util2 "github.com/chrislusf/seaweedfs/weed/util" + "github.com/spf13/viper" + "golang.org/x/tools/godoc/util" +) + +var ( + master = flag.String("master", "localhost:9333", "master server host and port") + volumeId = flag.Int("volumeId", -1, "a volume id") + rewindDuration = flag.Duration("rewind", -1, "rewind back in time. -1 means from the first entry. 0 means from now.") + timeoutSeconds = flag.Int("timeoutSeconds", 0, "disconnect if no activity after these seconds") + showTextFile = flag.Bool("showTextFile", false, "display textual file content") +) + +func main() { + flag.Parse() + + util2.LoadConfiguration("security", false) + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") + + vid := needle.VolumeId(*volumeId) + + var sinceTimeNs int64 + if *rewindDuration == 0 { + sinceTimeNs = time.Now().UnixNano() + } else if *rewindDuration == -1 { + sinceTimeNs = 0 + } else if *rewindDuration > 0 { + sinceTimeNs = time.Now().Add(-*rewindDuration).UnixNano() + } + + err := operation.TailVolume(*master, grpcDialOption, vid, uint64(sinceTimeNs), *timeoutSeconds, func(n *needle.Needle) (err error) { + if n.Size == 0 { + println("-", n.String()) + return nil + } else { + println("+", n.String()) + } + + if *showTextFile { + + data := n.Data + if n.IsGzipped() { + if data, err = util2.UnGzipData(data); err != nil { + return err + } + } + if util.IsText(data) { + println(string(data)) + } + + println("-", n.String(), "compressed", n.IsGzipped(), "original size", len(data)) + } + return nil + }) + + if err != nil { + log.Printf("Error VolumeTailSender volume %d: %v", vid, err) + } + +} diff --git a/util/gostd b/util/gostd new file mode 100755 index 000000000..e9fc783d1 --- /dev/null +++ b/util/gostd @@ -0,0 +1,98 @@ +#!/usr/bin/env bash + +############################ GLOBAL VARIABLES +regex=' ' +branch="master" +max_length=150 + +REGEX_SUFFIX_GO=".+\.go$" + +############################ FUNCTIONS +msg() { + printf '%b' "$1" >&2 +} + +die() { + msg "\33[31m[✘]\33[0m ${1}${2}" + exit 1 +} + +succ() { + msg "\33[34m[√]\33[0m ${1}${2}" +} + +gostd() { + local branch=$1 + local reg4exclude=$2 + local max_length=$3 + + for file in `git diff $branch --name-only` + do + if ! [[ $file =~ $REGEX_SUFFIX_GO ]] || [[ $file =~ $reg4exclude ]]; then + continue + fi + + error=`go fmt $file 2>&1` + if ! [ $? -eq 0 ]; then + die "go fmt $file:" "$error" + fi + + succ "$file\n" + + grep -n -E --color=always ".{$max_length}" $file | awk '{ printf ("%4s %s\n", "", $0) }' + done +} + +get_options() { + while getopts "b:e:hl:" opts + do + case $opts in + b) + branch=$OPTARG + ;; + e) + regex=$OPTARG + ;; + h) + usage + exit 0 + ;; + l) + max_length=$OPTARG + ;; + \?) + usage + exit 1 + ;; + esac + done +} + +usage () { + cat << _EOC_ +Usage: + gostd [options] + +Options: + -b <branch/commit> Specify the git diff branch or commit. + (default: master) + -e <regex> Regex for excluding file or directory. + -h Print this usage. + -l <length> Show files that exceed the limit line length. + (default: 150) + +Examples: + gostd + gostd -b master -l 100 + gostd -b 59d532a -e weed/pb -l 100 +_EOC_ +} + +main() { + get_options "$@" + + gostd "$branch" "$regex" "$max_length" +} + +############################ MAIN() +main "$@" diff --git a/weed/command/backup.go b/weed/command/backup.go index 7cdec34ad..615be80cf 100644 --- a/weed/command/backup.go +++ b/weed/command/backup.go @@ -3,6 +3,11 @@ package command import ( "fmt" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/storage" ) @@ -12,10 +17,12 @@ var ( ) type BackupOptions struct { - master *string - collection *string - dir *string - volumeId *int + master *string + collection *string + dir *string + volumeId *int + ttl *string + replication *string } func init() { @@ -24,32 +31,45 @@ func init() { s.collection = cmdBackup.Flag.String("collection", "", "collection name") s.dir = cmdBackup.Flag.String("dir", ".", "directory to store volume data files") s.volumeId = cmdBackup.Flag.Int("volumeId", -1, "a volume id. The volume .dat and .idx files should already exist in the dir.") + s.ttl = cmdBackup.Flag.String("ttl", "", `backup volume's time to live, format: + 3m: 3 minutes + 4h: 4 hours + 5d: 5 days + 6w: 6 weeks + 7M: 7 months + 8y: 8 years + default is the same with origin`) + s.replication = cmdBackup.Flag.String("replication", "", "backup volume's replication, default is the same with origin") } var cmdBackup = &Command{ UsageLine: "backup -dir=. -volumeId=234 -server=localhost:9333", Short: "incrementally backup a volume to local folder", Long: `Incrementally backup volume data. - + It is expected that you use this inside a script, to loop through all possible volume ids that needs to be backup to local folder. - + The volume id does not need to exist locally or even remotely. This will help to backup future new volumes. - + Usually backing up is just copying the .dat (and .idx) files. - But it's tricky to incremententally copy the differences. - + But it's tricky to incrementally copy the differences. + The complexity comes when there are multiple addition, deletion and compaction. - This tool will handle them correctly and efficiently, avoiding unnecessary data transporation. + This tool will handle them correctly and efficiently, avoiding unnecessary data transportation. `, } func runBackup(cmd *Command, args []string) bool { + + util.LoadConfiguration("security", false) + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") + if *s.volumeId == -1 { return false } - vid := storage.VolumeId(*s.volumeId) + vid := needle.VolumeId(*s.volumeId) // find volume location, replication, ttl info lookup, err := operation.Lookup(*s.master, vid.String()) @@ -59,29 +79,73 @@ func runBackup(cmd *Command, args []string) bool { } volumeServer := lookup.Locations[0].Url - stats, err := operation.GetVolumeSyncStatus(volumeServer, uint32(vid)) + stats, err := operation.GetVolumeSyncStatus(volumeServer, grpcDialOption, uint32(vid)) if err != nil { fmt.Printf("Error get volume %d status: %v\n", vid, err) return true } - ttl, err := storage.ReadTTL(stats.Ttl) - if err != nil { - fmt.Printf("Error get volume %d ttl %s: %v\n", vid, stats.Ttl, err) - return true + var ttl *needle.TTL + if *s.ttl != "" { + ttl, err = needle.ReadTTL(*s.ttl) + if err != nil { + fmt.Printf("Error generate volume %d ttl %s: %v\n", vid, *s.ttl, err) + return true + } + } else { + ttl, err = needle.ReadTTL(stats.Ttl) + if err != nil { + fmt.Printf("Error get volume %d ttl %s: %v\n", vid, stats.Ttl, err) + return true + } } - replication, err := storage.NewReplicaPlacementFromString(stats.Replication) - if err != nil { - fmt.Printf("Error get volume %d replication %s : %v\n", vid, stats.Replication, err) - return true + var replication *super_block.ReplicaPlacement + if *s.replication != "" { + replication, err = super_block.NewReplicaPlacementFromString(*s.replication) + if err != nil { + fmt.Printf("Error generate volume %d replication %s : %v\n", vid, *s.replication, err) + return true + } + } else { + replication, err = super_block.NewReplicaPlacementFromString(stats.Replication) + if err != nil { + fmt.Printf("Error get volume %d replication %s : %v\n", vid, stats.Replication, err) + return true + } } - - v, err := storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl, 0) + v, err := storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl, 0, 0) if err != nil { fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err) return true } - if err := v.Synchronize(volumeServer); err != nil { + if v.SuperBlock.CompactionRevision < uint16(stats.CompactRevision) { + if err = v.Compact2(30*1024*1024*1024, 0); err != nil { + fmt.Printf("Compact Volume before synchronizing %v\n", err) + return true + } + if err = v.CommitCompact(); err != nil { + fmt.Printf("Commit Compact before synchronizing %v\n", err) + return true + } + v.SuperBlock.CompactionRevision = uint16(stats.CompactRevision) + v.DataBackend.WriteAt(v.SuperBlock.Bytes(), 0) + } + + datSize, _, _ := v.FileStat() + + if datSize > stats.TailOffset { + // remove the old data + v.Destroy() + // recreate an empty volume + v, err = storage.NewVolume(*s.dir, *s.collection, vid, storage.NeedleMapInMemory, replication, ttl, 0, 0) + if err != nil { + fmt.Printf("Error creating or reading from volume %d: %v\n", vid, err) + return true + } + } + defer v.Close() + + if err := v.IncrementalBackup(volumeServer, grpcDialOption); err != nil { fmt.Printf("Error synchronizing volume %d: %v\n", vid, err) return true } diff --git a/weed/command/benchmark.go b/weed/command/benchmark.go index 8b65c8663..e85ab1b9b 100644 --- a/weed/command/benchmark.go +++ b/weed/command/benchmark.go @@ -15,8 +15,11 @@ import ( "sync" "time" + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/wdclient" @@ -33,19 +36,22 @@ type BenchmarkOptions struct { read *bool sequentialRead *bool collection *string + replication *string cpuprofile *string maxCpu *int - secretKey *string + grpcDialOption grpc.DialOption + masterClient *wdclient.MasterClient + grpcRead *bool } var ( - b BenchmarkOptions - sharedBytes []byte - masterClient *wdclient.MasterClient + b BenchmarkOptions + sharedBytes []byte + isSecure bool ) func init() { - cmdBenchmark.Run = runbenchmark // break init cycle + cmdBenchmark.Run = runBenchmark // break init cycle cmdBenchmark.IsDebug = cmdBenchmark.Flag.Bool("debug", false, "verbose debug information") b.masters = cmdBenchmark.Flag.String("master", "localhost:9333", "SeaweedFS master location") b.concurrency = cmdBenchmark.Flag.Int("c", 16, "number of concurrent write or read processes") @@ -57,14 +63,15 @@ func init() { b.read = cmdBenchmark.Flag.Bool("read", true, "enable read") b.sequentialRead = cmdBenchmark.Flag.Bool("readSequentially", false, "randomly read by ids from \"-list\" specified file") b.collection = cmdBenchmark.Flag.String("collection", "benchmark", "write data to this collection") + b.replication = cmdBenchmark.Flag.String("replication", "000", "replication type") b.cpuprofile = cmdBenchmark.Flag.String("cpuprofile", "", "cpu profile output file") b.maxCpu = cmdBenchmark.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs") - b.secretKey = cmdBenchmark.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") + b.grpcRead = cmdBenchmark.Flag.Bool("grpcRead", false, "use grpc API to read") sharedBytes = make([]byte, 1024) } var cmdBenchmark = &Command{ - UsageLine: "benchmark -server=localhost:9333 -c=10 -n=100000", + UsageLine: "benchmark -master=localhost:9333 -c=10 -n=100000", Short: "benchmark on writing millions of files and read out", Long: `benchmark on an empty SeaweedFS file system. @@ -101,7 +108,11 @@ var ( readStats *stats ) -func runbenchmark(cmd *Command, args []string) bool { +func runBenchmark(cmd *Command, args []string) bool { + + util.LoadConfiguration("security", false) + b.grpcDialOption = security.LoadClientTLS(util.GetViper(), "grpc.client") + fmt.Printf("This is SeaweedFS version %s %s %s\n", util.VERSION, runtime.GOOS, runtime.GOARCH) if *b.maxCpu < 1 { *b.maxCpu = runtime.NumCPU() @@ -116,22 +127,22 @@ func runbenchmark(cmd *Command, args []string) bool { defer pprof.StopCPUProfile() } - masterClient = wdclient.NewMasterClient(context.Background(), "benchmark", strings.Split(*b.masters, ",")) - go masterClient.KeepConnectedToMaster() - masterClient.WaitUntilConnected() + b.masterClient = wdclient.NewMasterClient(b.grpcDialOption, "client", 0, strings.Split(*b.masters, ",")) + go b.masterClient.KeepConnectedToMaster() + b.masterClient.WaitUntilConnected() if *b.write { - bench_write() + benchWrite() } if *b.read { - bench_read() + benchRead() } return true } -func bench_write() { +func benchWrite() { fileIdLineChan := make(chan string) finishChan := make(chan bool) writeStats = newStats(*b.concurrency) @@ -158,7 +169,7 @@ func bench_write() { writeStats.printStats() } -func bench_read() { +func benchRead() { fileIdLineChan := make(chan string) finishChan := make(chan bool) readStats = newStats(*b.concurrency) @@ -188,7 +199,6 @@ func writeFiles(idChan chan int, fileIdLineChan chan string, s *stat) { defer wait.Done() delayedDeleteChan := make(chan *delayedFile, 100) var waitForDeletions sync.WaitGroup - secret := security.Secret(*b.secretKey) for i := 0; i < 7; i++ { waitForDeletions.Add(1) @@ -198,8 +208,11 @@ func writeFiles(idChan chan int, fileIdLineChan chan string, s *stat) { if df.enterTime.After(time.Now()) { time.Sleep(df.enterTime.Sub(time.Now())) } - if e := util.Delete("http://"+df.fp.Server+"/"+df.fp.Fid, - security.GenJwt(secret, df.fp.Fid)); e == nil { + var jwtAuthorization security.EncodedJwt + if isSecure { + jwtAuthorization = operation.LookupJwt(b.masterClient.GetMaster(), df.fp.Fid) + } + if e := util.Delete(fmt.Sprintf("http://%s/%s", df.fp.Server, df.fp.Fid), string(jwtAuthorization)); e == nil { s.completed++ } else { s.failed++ @@ -213,14 +226,22 @@ func writeFiles(idChan chan int, fileIdLineChan chan string, s *stat) { for id := range idChan { start := time.Now() fileSize := int64(*b.fileSize + random.Intn(64)) - fp := &operation.FilePart{Reader: &FakeReader{id: uint64(id), size: fileSize}, FileSize: fileSize} + fp := &operation.FilePart{ + Reader: &FakeReader{id: uint64(id), size: fileSize}, + FileSize: fileSize, + MimeType: "image/bench", // prevent gzip benchmark content + } ar := &operation.VolumeAssignRequest{ - Count: 1, - Collection: *b.collection, + Count: 1, + Collection: *b.collection, + Replication: *b.replication, } - if assignResult, err := operation.Assign(masterClient.GetMaster(), ar); err == nil { + if assignResult, err := operation.Assign(b.masterClient.GetMaster(), b.grpcDialOption, ar); err == nil { fp.Server, fp.Fid, fp.Collection = assignResult.Url, assignResult.Fid, *b.collection - if _, err := fp.Upload(0, masterClient.GetMaster(), secret); err == nil { + if !isSecure && assignResult.Auth != "" { + isSecure = true + } + if _, err := fp.Upload(0, b.masterClient.GetMaster(), assignResult.Auth, b.grpcDialOption); err == nil { if random.Intn(100) < *b.deletePercentage { s.total++ delayedDeleteChan <- &delayedFile{time.Now().Add(time.Second), fp} @@ -260,23 +281,61 @@ func readFiles(fileIdLineChan chan string, s *stat) { fmt.Printf("reading file %s\n", fid) } start := time.Now() - url, err := masterClient.LookupFileId(fid) - if err != nil { - s.failed++ - println("!!!! ", fid, " location not found!!!!!") - continue + var bytesRead int + var err error + if *b.grpcRead { + volumeServer, err := b.masterClient.LookupVolumeServer(fid) + if err != nil { + s.failed++ + println("!!!! ", fid, " location not found!!!!!") + continue + } + bytesRead, err = grpcFileGet(volumeServer, fid, b.grpcDialOption) + } else { + url, err := b.masterClient.LookupFileId(fid) + if err != nil { + s.failed++ + println("!!!! ", fid, " location not found!!!!!") + continue + } + var bytes []byte + bytes, err = util.Get(url) + bytesRead = len(bytes) } - if bytesRead, err := util.Get(url); err == nil { + if err == nil { s.completed++ - s.transferred += int64(len(bytesRead)) + s.transferred += int64(bytesRead) readStats.addSample(time.Now().Sub(start)) } else { s.failed++ - fmt.Printf("Failed to read %s error:%v\n", url, err) + fmt.Printf("Failed to read %s error:%v\n", fid, err) } } } +func grpcFileGet(volumeServer, fid string, grpcDialOption grpc.DialOption) (bytesRead int, err error) { + err = operation.WithVolumeServerClient(volumeServer, grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + fileGetClient, err := client.FileGet(context.Background(), &volume_server_pb.FileGetRequest{FileId: fid}) + if err != nil { + return err + } + + for { + resp, respErr := fileGetClient.Recv() + if resp != nil { + bytesRead += len(resp.Data) + } + if respErr != nil { + if respErr == io.EOF { + return nil + } + return respErr + } + } + }) + return +} + func writeFileIds(fileName string, fileIdLineChan chan string, finishChan chan bool) { file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { diff --git a/weed/command/command.go b/weed/command/command.go index 91b9bf3fc..9dc51e922 100644 --- a/weed/command/command.go +++ b/weed/command/command.go @@ -13,7 +13,6 @@ var Commands = []*Command{ cmdCompact, cmdCopy, cmdFix, - cmdFilerExport, cmdFilerReplicate, cmdServer, cmdMaster, @@ -21,12 +20,14 @@ var Commands = []*Command{ cmdS3, cmdUpload, cmdDownload, + cmdMsgBroker, cmdScaffold, cmdShell, cmdVersion, cmdVolume, cmdExport, cmdMount, + cmdWebDav, } type Command struct { diff --git a/weed/command/compact.go b/weed/command/compact.go index 0dd4efe0e..4e28aa725 100644 --- a/weed/command/compact.go +++ b/weed/command/compact.go @@ -3,6 +3,7 @@ package command import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) func init() { @@ -16,6 +17,9 @@ var cmdCompact = &Command{ The compacted .dat file is stored as .cpd file. The compacted .idx file is stored as .cpx file. + For method=0, it compacts based on the .dat file, works if .idx file is corrupted. + For method=1, it compacts based on the .idx file, works if deletion happened but not written to .dat files. + `, } @@ -35,18 +39,18 @@ func runCompact(cmd *Command, args []string) bool { preallocate := *compactVolumePreallocate * (1 << 20) - vid := storage.VolumeId(*compactVolumeId) + vid := needle.VolumeId(*compactVolumeId) v, err := storage.NewVolume(*compactVolumePath, *compactVolumeCollection, vid, - storage.NeedleMapInMemory, nil, nil, preallocate) + storage.NeedleMapInMemory, nil, nil, preallocate, 0) if err != nil { glog.Fatalf("Load Volume [ERROR] %s\n", err) } if *compactMethod == 0 { - if err = v.Compact(preallocate); err != nil { + if err = v.Compact(preallocate, 0); err != nil { glog.Fatalf("Compact Volume [ERROR] %s\n", err) } } else { - if err = v.Compact2(); err != nil { + if err = v.Compact2(preallocate, 0); err != nil { glog.Fatalf("Compact Volume [ERROR] %s\n", err) } } diff --git a/weed/command/download.go b/weed/command/download.go index b3e33defd..be0eb47e5 100644 --- a/weed/command/download.go +++ b/weed/command/download.go @@ -71,6 +71,7 @@ func downloadToFile(server, fileId, saveDir string) error { } f, err := os.OpenFile(path.Join(saveDir, filename), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) if err != nil { + io.Copy(ioutil.Discard, rc) return err } defer f.Close() diff --git a/weed/command/export.go b/weed/command/export.go index 1202d687c..8c32b3f4d 100644 --- a/weed/command/export.go +++ b/weed/command/export.go @@ -4,6 +4,7 @@ import ( "archive/tar" "bytes" "fmt" + "io" "os" "path" "path/filepath" @@ -14,8 +15,10 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/storage/types" - "io" ) const ( @@ -66,10 +69,10 @@ var ( localLocation, _ = time.LoadLocation("Local") ) -func printNeedle(vid storage.VolumeId, n *storage.Needle, version storage.Version, deleted bool) { - key := storage.NewFileIdFromNeedle(vid, n).String() +func printNeedle(vid needle.VolumeId, n *needle.Needle, version needle.Version, deleted bool) { + key := needle.NewFileIdFromNeedle(vid, n).String() size := n.DataSize - if version == storage.Version1 { + if version == needle.Version1 { size = n.Size } fmt.Printf("%s\t%s\t%d\t%t\t%s\t%s\t%s\t%t\n", @@ -84,6 +87,62 @@ func printNeedle(vid storage.VolumeId, n *storage.Needle, version storage.Versio ) } +type VolumeFileScanner4Export struct { + version needle.Version + counter int + needleMap *needle_map.MemDb + vid needle.VolumeId +} + +func (scanner *VolumeFileScanner4Export) VisitSuperBlock(superBlock super_block.SuperBlock) error { + scanner.version = superBlock.Version + return nil + +} +func (scanner *VolumeFileScanner4Export) ReadNeedleBody() bool { + return true +} + +func (scanner *VolumeFileScanner4Export) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { + needleMap := scanner.needleMap + vid := scanner.vid + + nv, ok := needleMap.Get(n.Id) + glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v", + n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped(), ok, nv) + if ok && nv.Size > 0 && nv.Size != types.TombstoneFileSize && nv.Offset.ToAcutalOffset() == offset { + if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { + glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", + n.LastModified, newerThanUnix) + return nil + } + scanner.counter++ + if *limit > 0 && scanner.counter > *limit { + return io.EOF + } + if tarOutputFile != nil { + return writeFile(vid, n) + } else { + printNeedle(vid, n, scanner.version, false) + return nil + } + } + if !ok { + if *showDeleted && tarOutputFile == nil { + if n.DataSize > 0 { + printNeedle(vid, n, scanner.version, true) + } else { + n.Name = []byte("*tombstone") + printNeedle(vid, n, scanner.version, true) + } + } + glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size) + } else { + glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size) + } + return nil +} + func runExport(cmd *Command, args []string) bool { var err error @@ -133,67 +192,25 @@ func runExport(cmd *Command, args []string) bool { if *export.collection != "" { fileName = *export.collection + "_" + fileName } - vid := storage.VolumeId(*export.volumeId) - indexFile, err := os.OpenFile(path.Join(*export.dir, fileName+".idx"), os.O_RDONLY, 0644) - if err != nil { - glog.Fatalf("Create Volume Index [ERROR] %s\n", err) - } - defer indexFile.Close() + vid := needle.VolumeId(*export.volumeId) - needleMap, err := storage.LoadBtreeNeedleMap(indexFile) - if err != nil { - glog.Fatalf("cannot load needle map from %s: %s", indexFile.Name(), err) + needleMap := needle_map.NewMemDb() + defer needleMap.Close() + + if err := needleMap.LoadFromIdx(path.Join(*export.dir, fileName+".idx")); err != nil { + glog.Fatalf("cannot load needle map from %s.idx: %s", fileName, err) } - var version storage.Version + volumeFileScanner := &VolumeFileScanner4Export{ + needleMap: needleMap, + vid: vid, + } if tarOutputFile == nil { fmt.Printf("key\tname\tsize\tgzip\tmime\tmodified\tttl\tdeleted\n") } - var counter = 0 - - err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, - storage.NeedleMapInMemory, - func(superBlock storage.SuperBlock) error { - version = superBlock.Version() - return nil - }, true, func(n *storage.Needle, offset int64) error { - nv, ok := needleMap.Get(n.Id) - glog.V(3).Infof("key %d offset %d size %d disk_size %d gzip %v ok %v nv %+v", - n.Id, offset, n.Size, n.DiskSize(version), n.IsGzipped(), ok, nv) - if ok && nv.Size > 0 && int64(nv.Offset)*types.NeedlePaddingSize == offset { - if newerThanUnix >= 0 && n.HasLastModifiedDate() && n.LastModified < uint64(newerThanUnix) { - glog.V(3).Infof("Skipping this file, as it's old enough: LastModified %d vs %d", - n.LastModified, newerThanUnix) - return nil - } - counter++ - if *limit > 0 && counter > *limit { - return io.EOF - } - if tarOutputFile != nil { - return writeFile(vid, n) - } else { - printNeedle(vid, n, version, false) - return nil - } - } - if !ok { - if *showDeleted && tarOutputFile == nil { - if n.DataSize > 0 { - printNeedle(vid, n, version, true) - } else { - n.Name = []byte("*tombstone") - printNeedle(vid, n, version, true) - } - } - glog.V(2).Infof("This seems deleted %d size %d", n.Id, n.Size) - } else { - glog.V(2).Infof("Skipping later-updated Id %d size %d", n.Id, n.Size) - } - return nil - }) + err = storage.ScanVolumeFile(*export.dir, *export.collection, vid, storage.NeedleMapInMemory, volumeFileScanner) if err != nil && err != io.EOF { glog.Fatalf("Export Volume File [ERROR] %s\n", err) } @@ -208,8 +225,8 @@ type nameParams struct { Ext string } -func writeFile(vid storage.VolumeId, n *storage.Needle) (err error) { - key := storage.NewFileIdFromNeedle(vid, n).String() +func writeFile(vid needle.VolumeId, n *needle.Needle) (err error) { + key := needle.NewFileIdFromNeedle(vid, n).String() fileNameTemplateBuffer.Reset() if err = fileNameTemplate.Execute(fileNameTemplateBuffer, nameParams{ diff --git a/weed/command/filer.go b/weed/command/filer.go index a65139b15..fb1ee2b0f 100644 --- a/weed/command/filer.go +++ b/weed/command/filer.go @@ -6,11 +6,14 @@ import ( "strings" "time" + "google.golang.org/grpc/reflection" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/server" "github.com/chrislusf/seaweedfs/weed/util" - "google.golang.org/grpc/reflection" ) var ( @@ -21,17 +24,19 @@ type FilerOptions struct { masters *string ip *string port *int - grpcPort *int publicPort *int collection *string defaultReplicaPlacement *string - redirectOnRead *bool disableDirListing *bool maxMB *int - secretKey *string dirListingLimit *int dataCenter *string enableNotification *bool + disableHttp *bool + cipher *bool + + // default leveldb directory, used in "weed server" mode + defaultLevelDbDirectory *string } func init() { @@ -40,15 +45,14 @@ func init() { f.collection = cmdFiler.Flag.String("collection", "", "all data will be stored in this collection") f.ip = cmdFiler.Flag.String("ip", "", "filer server http listen ip address") f.port = cmdFiler.Flag.Int("port", 8888, "filer server http listen port") - f.grpcPort = cmdFiler.Flag.Int("port.grpc", 0, "filer grpc server listen port, default to http port + 10000") - f.publicPort = cmdFiler.Flag.Int("port.public", 0, "port opened to public") + f.publicPort = cmdFiler.Flag.Int("port.readonly", 0, "readonly port opened to public") f.defaultReplicaPlacement = cmdFiler.Flag.String("defaultReplicaPlacement", "000", "default replication type if not specified") - f.redirectOnRead = cmdFiler.Flag.Bool("redirectOnRead", false, "whether proxy or redirect to volume server during file GET request") f.disableDirListing = cmdFiler.Flag.Bool("disableDirListing", false, "turn off directory listing") f.maxMB = cmdFiler.Flag.Int("maxMB", 32, "split files larger than the limit") - f.secretKey = cmdFiler.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") - f.dirListingLimit = cmdFiler.Flag.Int("dirListLimit", 1000, "limit sub dir listing size") + f.dirListingLimit = cmdFiler.Flag.Int("dirListLimit", 100000, "limit sub dir listing size") f.dataCenter = cmdFiler.Flag.String("dataCenter", "", "prefer to write to volumes in this data center") + f.disableHttp = cmdFiler.Flag.Bool("disableHttp", false, "disable http request, only gRpc operations are allowed") + f.cipher = cmdFiler.Flag.Bool("encryptVolumeData", false, "encrypt data on volume servers") } var cmdFiler = &Command{ @@ -67,13 +71,15 @@ var cmdFiler = &Command{ The configuration file "filer.toml" is read from ".", "$HOME/.seaweedfs/", or "/etc/seaweedfs/", in that order. - The example filer.toml configuration file can be generated by "weed scaffold filer" + The example filer.toml configuration file can be generated by "weed scaffold -config=filer" `, } func runFiler(cmd *Command, args []string) bool { + util.LoadConfiguration("security", false) + f.startFiler() return true @@ -88,16 +94,23 @@ func (fo *FilerOptions) startFiler() { publicVolumeMux = http.NewServeMux() } + defaultLevelDbDirectory := "./filerldb2" + if fo.defaultLevelDbDirectory != nil { + defaultLevelDbDirectory = *fo.defaultLevelDbDirectory + "/filerldb2" + } + fs, nfs_err := weed_server.NewFilerServer(defaultMux, publicVolumeMux, &weed_server.FilerOption{ - Masters: strings.Split(*f.masters, ","), + Masters: strings.Split(*fo.masters, ","), Collection: *fo.collection, DefaultReplication: *fo.defaultReplicaPlacement, - RedirectOnRead: *fo.redirectOnRead, DisableDirListing: *fo.disableDirListing, MaxMB: *fo.maxMB, - SecretKey: *fo.secretKey, DirListingLimit: *fo.dirListingLimit, DataCenter: *fo.dataCenter, + DefaultLevelDbDir: defaultLevelDbDirectory, + DisableHttp: *fo.disableHttp, + Port: uint32(*fo.port), + Cipher: *fo.cipher, }) if nfs_err != nil { glog.Fatalf("Filer startup error: %v", nfs_err) @@ -117,9 +130,9 @@ func (fo *FilerOptions) startFiler() { }() } - glog.V(0).Infoln("Start Seaweed Filer", util.VERSION, "at port", strconv.Itoa(*fo.port)) + glog.V(0).Infof("Start Seaweed Filer %s at %s:%d", util.VERSION, *fo.ip, *fo.port) filerListener, e := util.NewListener( - ":"+strconv.Itoa(*fo.port), + *fo.ip+":"+strconv.Itoa(*fo.port), time.Duration(10)*time.Second, ) if e != nil { @@ -127,15 +140,12 @@ func (fo *FilerOptions) startFiler() { } // starting grpc server - grpcPort := *fo.grpcPort - if grpcPort == 0 { - grpcPort = *fo.port + 10000 - } + grpcPort := *fo.port + 10000 grpcL, err := util.NewListener(":"+strconv.Itoa(grpcPort), 0) if err != nil { glog.Fatalf("failed to listen on grpc port %d: %v", grpcPort, err) } - grpcS := util.NewGrpcServer() + grpcS := pb.NewGrpcServer(security.LoadServerTLS(util.GetViper(), "grpc.filer")) filer_pb.RegisterSeaweedFilerServer(grpcS, fs) reflection.Register(grpcS) go grpcS.Serve(grpcL) diff --git a/weed/command/filer_copy.go b/weed/command/filer_copy.go index b519afe86..0aee8cd80 100644 --- a/weed/command/filer_copy.go +++ b/weed/command/filer_copy.go @@ -1,53 +1,60 @@ package command import ( + "context" "fmt" + "io" "io/ioutil" + "net/http" "net/url" "os" "path/filepath" + "strconv" "strings" + "sync" + "time" + + "google.golang.org/grpc" - "context" "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/util" - "io" - "net/http" - "path" - "strconv" - "time" + "github.com/chrislusf/seaweedfs/weed/wdclient" ) var ( - copy CopyOptions + copy CopyOptions + waitGroup sync.WaitGroup ) type CopyOptions struct { - filerGrpcPort *int - master *string - include *string - replication *string - collection *string - ttl *string - maxMB *int - secretKey *string - - secret security.Secret + include *string + replication *string + collection *string + ttl *string + maxMB *int + masterClient *wdclient.MasterClient + concurrenctFiles *int + concurrenctChunks *int + grpcDialOption grpc.DialOption + masters []string + cipher bool + ttlSec int32 } func init() { cmdCopy.Run = runCopy // break init cycle cmdCopy.IsDebug = cmdCopy.Flag.Bool("debug", false, "verbose debug information") - copy.master = cmdCopy.Flag.String("master", "localhost:9333", "SeaweedFS master location") copy.include = cmdCopy.Flag.String("include", "", "pattens of files to copy, e.g., *.pdf, *.html, ab?d.txt, works together with -dir") copy.replication = cmdCopy.Flag.String("replication", "", "replication type") copy.collection = cmdCopy.Flag.String("collection", "", "optional collection name") copy.ttl = cmdCopy.Flag.String("ttl", "", "time to live, e.g.: 1m, 1h, 1d, 1M, 1y") - copy.maxMB = cmdCopy.Flag.Int("maxMB", 0, "split files larger than the limit") - copy.filerGrpcPort = cmdCopy.Flag.Int("filer.port.grpc", 0, "filer grpc server listen port, default to filer port + 10000") - copy.secretKey = cmdCopy.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") + copy.maxMB = cmdCopy.Flag.Int("maxMB", 32, "split files larger than the limit") + copy.concurrenctFiles = cmdCopy.Flag.Int("c", 8, "concurrent file copy goroutines") + copy.concurrenctChunks = cmdCopy.Flag.Int("concurrentChunks", 8, "concurrent chunk copy goroutines for each file") } var cmdCopy = &Command{ @@ -61,17 +68,15 @@ var cmdCopy = &Command{ All files under the folder and subfolders will be copyed. Optional parameter "-include" allows you to specify the file name patterns. - If any file has a ".gz" extension, the content are considered gzipped already, and will be stored as is. - This can save volume server's gzipped processing and allow customizable gzip compression level. - The file name will strip out ".gz" and stored. For example, "jquery.js.gz" will be stored as "jquery.js". - If "maxMB" is set to a positive number, files larger than it would be split into chunks. `, } func runCopy(cmd *Command, args []string) bool { - copy.secret = security.Secret(*copy.secretKey) + + util.LoadConfiguration("security", false) + if len(args) <= 1 { return false } @@ -85,7 +90,8 @@ func runCopy(cmd *Command, args []string) bool { } urlPath := filerUrl.Path if !strings.HasSuffix(urlPath, "/") { - urlPath = urlPath + "/" + fmt.Printf("The last argument should be a folder and end with \"/\": %v\n", err) + return false } if filerUrl.Port() == "" { @@ -100,253 +106,412 @@ func runCopy(cmd *Command, args []string) bool { } filerGrpcPort := filerPort + 10000 - if *copy.filerGrpcPort != 0 { - filerGrpcPort = uint64(*copy.filerGrpcPort) + filerGrpcAddress := fmt.Sprintf("%s:%d", filerUrl.Hostname(), filerGrpcPort) + copy.grpcDialOption = security.LoadClientTLS(util.GetViper(), "grpc.client") + + masters, collection, replication, maxMB, cipher, err := readFilerConfiguration(copy.grpcDialOption, filerGrpcAddress) + if err != nil { + fmt.Printf("read from filer %s: %v\n", filerGrpcAddress, err) + return false + } + if *copy.collection == "" { + *copy.collection = collection + } + if *copy.replication == "" { + *copy.replication = replication } + if *copy.maxMB == 0 { + *copy.maxMB = int(maxMB) + } + copy.masters = masters + copy.cipher = cipher - filerGrpcAddress := fmt.Sprintf("%s:%d", filerUrl.Hostname(), filerGrpcPort) + ttl, err := needle.ReadTTL(*copy.ttl) + if err != nil { + fmt.Printf("parsing ttl %s: %v\n", *copy.ttl, err) + return false + } + copy.ttlSec = int32(ttl.Minutes()) * 60 - for _, fileOrDir := range fileOrDirs { - if !doEachCopy(fileOrDir, filerUrl.Host, filerGrpcAddress, urlPath) { - return false + if *cmdCopy.IsDebug { + util.SetupProfiling("filer.copy.cpu.pprof", "filer.copy.mem.pprof") + } + + fileCopyTaskChan := make(chan FileCopyTask, *copy.concurrenctFiles) + + go func() { + defer close(fileCopyTaskChan) + for _, fileOrDir := range fileOrDirs { + if err := genFileCopyTask(fileOrDir, urlPath, fileCopyTaskChan); err != nil { + fmt.Fprintf(os.Stderr, "gen file list error: %v\n", err) + break + } } + }() + for i := 0; i < *copy.concurrenctFiles; i++ { + waitGroup.Add(1) + go func() { + defer waitGroup.Done() + worker := FileCopyWorker{ + options: ©, + filerHost: filerUrl.Host, + filerGrpcAddress: filerGrpcAddress, + } + if err := worker.copyFiles(fileCopyTaskChan); err != nil { + fmt.Fprintf(os.Stderr, "copy file error: %v\n", err) + return + } + }() } + waitGroup.Wait() + return true } -func doEachCopy(fileOrDir string, filerAddress, filerGrpcAddress string, path string) bool { - f, err := os.Open(fileOrDir) - if err != nil { - fmt.Printf("Failed to open file %s: %v\n", fileOrDir, err) - return false - } - defer f.Close() +func readFilerConfiguration(grpcDialOption grpc.DialOption, filerGrpcAddress string) (masters []string, collection, replication string, maxMB uint32, cipher bool, err error) { + err = pb.WithGrpcFilerClient(filerGrpcAddress, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get filer %s configuration: %v", filerGrpcAddress, err) + } + masters, collection, replication, maxMB = resp.Masters, resp.Collection, resp.Replication, resp.MaxMb + cipher = resp.Cipher + return nil + }) + return +} + +func genFileCopyTask(fileOrDir string, destPath string, fileCopyTaskChan chan FileCopyTask) error { - fi, err := f.Stat() + fi, err := os.Stat(fileOrDir) if err != nil { - fmt.Printf("Failed to get stat for file %s: %v\n", fileOrDir, err) - return false + fmt.Fprintf(os.Stderr, "Failed to get stat for file %s: %v\n", fileOrDir, err) + return nil } mode := fi.Mode() if mode.IsDir() { files, _ := ioutil.ReadDir(fileOrDir) for _, subFileOrDir := range files { - if !doEachCopy(fileOrDir+"/"+subFileOrDir.Name(), filerAddress, filerGrpcAddress, path+fi.Name()+"/") { - return false + if err = genFileCopyTask(fileOrDir+"/"+subFileOrDir.Name(), destPath+fi.Name()+"/", fileCopyTaskChan); err != nil { + return err } } - return true + return nil + } + + uid, gid := util.GetFileUidGid(fi) + + fileCopyTaskChan <- FileCopyTask{ + sourceLocation: fileOrDir, + destinationUrlPath: destPath, + fileSize: fi.Size(), + fileMode: fi.Mode(), + uid: uid, + gid: gid, + } + + return nil +} + +type FileCopyWorker struct { + options *CopyOptions + filerHost string + filerGrpcAddress string +} + +func (worker *FileCopyWorker) copyFiles(fileCopyTaskChan chan FileCopyTask) error { + for task := range fileCopyTaskChan { + if err := worker.doEachCopy(task); err != nil { + return err + } + } + return nil +} + +type FileCopyTask struct { + sourceLocation string + destinationUrlPath string + fileSize int64 + fileMode os.FileMode + uid uint32 + gid uint32 +} + +func (worker *FileCopyWorker) doEachCopy(task FileCopyTask) error { + + f, err := os.Open(task.sourceLocation) + if err != nil { + fmt.Printf("Failed to open file %s: %v\n", task.sourceLocation, err) + if _, ok := err.(*os.PathError); ok { + fmt.Printf("skipping %s\n", task.sourceLocation) + return nil + } + return err } + defer f.Close() // this is a regular file - if *copy.include != "" { - if ok, _ := filepath.Match(*copy.include, filepath.Base(fileOrDir)); !ok { - return true + if *worker.options.include != "" { + if ok, _ := filepath.Match(*worker.options.include, filepath.Base(task.sourceLocation)); !ok { + return nil } } // find the chunk count - chunkSize := int64(*copy.maxMB * 1024 * 1024) + chunkSize := int64(*worker.options.maxMB * 1024 * 1024) chunkCount := 1 - if chunkSize > 0 && fi.Size() > chunkSize { - chunkCount = int(fi.Size()/chunkSize) + 1 + if chunkSize > 0 && task.fileSize > chunkSize { + chunkCount = int(task.fileSize/chunkSize) + 1 } if chunkCount == 1 { - return uploadFileAsOne(filerAddress, filerGrpcAddress, path, f, fi) + return worker.uploadFileAsOne(task, f) } - return uploadFileInChunks(filerAddress, filerGrpcAddress, path, f, fi, chunkCount, chunkSize) + return worker.uploadFileInChunks(task, f, chunkCount, chunkSize) } -func uploadFileAsOne(filerAddress, filerGrpcAddress string, urlFolder string, f *os.File, fi os.FileInfo) bool { +func (worker *FileCopyWorker) uploadFileAsOne(task FileCopyTask, f *os.File) error { // upload the file content fileName := filepath.Base(f.Name()) mimeType := detectMimeType(f) - isGzipped := isGzipped(fileName) + data, err := ioutil.ReadAll(f) + if err != nil { + return err + } var chunks []*filer_pb.FileChunk + var assignResult *filer_pb.AssignVolumeResponse + var assignError error - if fi.Size() > 0 { + if task.fileSize > 0 { // assign a volume - assignResult, err := operation.Assign(*copy.master, &operation.VolumeAssignRequest{ - Count: 1, - Replication: *copy.replication, - Collection: *copy.collection, - Ttl: *copy.ttl, + err := pb.WithGrpcFilerClient(worker.filerGrpcAddress, worker.options.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.AssignVolumeRequest{ + Count: 1, + Replication: *worker.options.replication, + Collection: *worker.options.collection, + TtlSec: worker.options.ttlSec, + ParentPath: task.destinationUrlPath, + } + + assignResult, assignError = client.AssignVolume(context.Background(), request) + if assignError != nil { + return fmt.Errorf("assign volume failure %v: %v", request, assignError) + } + if assignResult.Error != "" { + return fmt.Errorf("assign volume failure %v: %v", request, assignResult.Error) + } + return nil }) if err != nil { - fmt.Printf("Failed to assign from %s: %v\n", *copy.master, err) + fmt.Printf("Failed to assign from %v: %v\n", worker.options.masters, err) } - targetUrl := "http://" + assignResult.Url + "/" + assignResult.Fid + targetUrl := "http://" + assignResult.Url + "/" + assignResult.FileId - uploadResult, err := operation.Upload(targetUrl, fileName, f, isGzipped, mimeType, nil, "") + uploadResult, err := operation.UploadData(targetUrl, fileName, worker.options.cipher, data, false, mimeType, nil, security.EncodedJwt(assignResult.Auth)) if err != nil { - fmt.Printf("upload data %v to %s: %v\n", fileName, targetUrl, err) - return false + return fmt.Errorf("upload data %v to %s: %v\n", fileName, targetUrl, err) } if uploadResult.Error != "" { - fmt.Printf("upload %v to %s result: %v\n", fileName, targetUrl, uploadResult.Error) - return false + return fmt.Errorf("upload %v to %s result: %v\n", fileName, targetUrl, uploadResult.Error) } fmt.Printf("uploaded %s to %s\n", fileName, targetUrl) chunks = append(chunks, &filer_pb.FileChunk{ - FileId: assignResult.Fid, - Offset: 0, - Size: uint64(uploadResult.Size), - Mtime: time.Now().UnixNano(), - ETag: uploadResult.ETag, + FileId: assignResult.FileId, + Offset: 0, + Size: uint64(uploadResult.Size), + Mtime: time.Now().UnixNano(), + ETag: uploadResult.Md5, + CipherKey: uploadResult.CipherKey, + IsGzipped: uploadResult.Gzip > 0, }) - fmt.Printf("copied %s => http://%s%s%s\n", fileName, filerAddress, urlFolder, fileName) + fmt.Printf("copied %s => http://%s%s%s\n", fileName, worker.filerHost, task.destinationUrlPath, fileName) } - if err := withFilerClient(filerGrpcAddress, func(client filer_pb.SeaweedFilerClient) error { + if err := pb.WithGrpcFilerClient(worker.filerGrpcAddress, worker.options.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { request := &filer_pb.CreateEntryRequest{ - Directory: urlFolder, + Directory: task.destinationUrlPath, Entry: &filer_pb.Entry{ Name: fileName, Attributes: &filer_pb.FuseAttributes{ Crtime: time.Now().Unix(), Mtime: time.Now().Unix(), - Gid: uint32(os.Getgid()), - Uid: uint32(os.Getuid()), - FileSize: uint64(fi.Size()), - FileMode: uint32(fi.Mode()), + Gid: task.gid, + Uid: task.uid, + FileSize: uint64(task.fileSize), + FileMode: uint32(task.fileMode), Mime: mimeType, - Replication: *copy.replication, - Collection: *copy.collection, - TtlSec: int32(util.ParseInt(*copy.ttl, 0)), + Replication: *worker.options.replication, + Collection: *worker.options.collection, + TtlSec: worker.options.ttlSec, }, Chunks: chunks, }, } - if _, err := client.CreateEntry(context.Background(), request); err != nil { + if err := filer_pb.CreateEntry(client, request); err != nil { return fmt.Errorf("update fh: %v", err) } return nil }); err != nil { - fmt.Printf("upload data %v to http://%s%s%s: %v\n", fileName, filerAddress, urlFolder, fileName, err) - return false + return fmt.Errorf("upload data %v to http://%s%s%s: %v\n", fileName, worker.filerHost, task.destinationUrlPath, fileName, err) } - return true + return nil } -func uploadFileInChunks(filerAddress, filerGrpcAddress string, urlFolder string, f *os.File, fi os.FileInfo, chunkCount int, chunkSize int64) bool { +func (worker *FileCopyWorker) uploadFileInChunks(task FileCopyTask, f *os.File, chunkCount int, chunkSize int64) error { fileName := filepath.Base(f.Name()) mimeType := detectMimeType(f) - var chunks []*filer_pb.FileChunk + chunksChan := make(chan *filer_pb.FileChunk, chunkCount) + + concurrentChunks := make(chan struct{}, *worker.options.concurrenctChunks) + var wg sync.WaitGroup + var uploadError error + var collection, replication string + + fmt.Printf("uploading %s in %d chunks ...\n", fileName, chunkCount) + for i := int64(0); i < int64(chunkCount) && uploadError == nil; i++ { + wg.Add(1) + concurrentChunks <- struct{}{} + go func(i int64) { + defer func() { + wg.Done() + <-concurrentChunks + }() + // assign a volume + var assignResult *filer_pb.AssignVolumeResponse + var assignError error + err := pb.WithGrpcFilerClient(worker.filerGrpcAddress, worker.options.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + request := &filer_pb.AssignVolumeRequest{ + Count: 1, + Replication: *worker.options.replication, + Collection: *worker.options.collection, + TtlSec: worker.options.ttlSec, + ParentPath: task.destinationUrlPath, + } + + assignResult, assignError = client.AssignVolume(context.Background(), request) + if assignError != nil { + return fmt.Errorf("assign volume failure %v: %v", request, assignError) + } + if assignResult.Error != "" { + return fmt.Errorf("assign volume failure %v: %v", request, assignResult.Error) + } + return nil + }) + if err != nil { + fmt.Printf("Failed to assign from %v: %v\n", worker.options.masters, err) + } + if err != nil { + fmt.Printf("Failed to assign from %v: %v\n", worker.options.masters, err) + } - for i := int64(0); i < int64(chunkCount); i++ { + targetUrl := "http://" + assignResult.Url + "/" + assignResult.FileId + if collection == "" { + collection = assignResult.Collection + } + if replication == "" { + replication = assignResult.Replication + } - // assign a volume - assignResult, err := operation.Assign(*copy.master, &operation.VolumeAssignRequest{ - Count: 1, - Replication: *copy.replication, - Collection: *copy.collection, - Ttl: *copy.ttl, - }) - if err != nil { - fmt.Printf("Failed to assign from %s: %v\n", *copy.master, err) - } + uploadResult, err := operation.Upload(targetUrl, fileName+"-"+strconv.FormatInt(i+1, 10), worker.options.cipher, io.NewSectionReader(f, i*chunkSize, chunkSize), false, "", nil, security.EncodedJwt(assignResult.Auth)) + if err != nil { + uploadError = fmt.Errorf("upload data %v to %s: %v\n", fileName, targetUrl, err) + return + } + if uploadResult.Error != "" { + uploadError = fmt.Errorf("upload %v to %s result: %v\n", fileName, targetUrl, uploadResult.Error) + return + } + chunksChan <- &filer_pb.FileChunk{ + FileId: assignResult.FileId, + Offset: i * chunkSize, + Size: uint64(uploadResult.Size), + Mtime: time.Now().UnixNano(), + ETag: uploadResult.ETag, + CipherKey: uploadResult.CipherKey, + IsGzipped: uploadResult.Gzip > 0, + } + fmt.Printf("uploaded %s-%d to %s [%d,%d)\n", fileName, i+1, targetUrl, i*chunkSize, i*chunkSize+int64(uploadResult.Size)) + }(i) + } + wg.Wait() + close(chunksChan) - targetUrl := "http://" + assignResult.Url + "/" + assignResult.Fid + var chunks []*filer_pb.FileChunk + for chunk := range chunksChan { + chunks = append(chunks, chunk) + } - uploadResult, err := operation.Upload(targetUrl, - fileName+"-"+strconv.FormatInt(i+1, 10), - io.LimitReader(f, chunkSize), - false, "application/octet-stream", nil, "") - if err != nil { - fmt.Printf("upload data %v to %s: %v\n", fileName, targetUrl, err) - return false - } - if uploadResult.Error != "" { - fmt.Printf("upload %v to %s result: %v\n", fileName, targetUrl, uploadResult.Error) - return false + if uploadError != nil { + var fileIds []string + for _, chunk := range chunks { + fileIds = append(fileIds, chunk.FileId) } - chunks = append(chunks, &filer_pb.FileChunk{ - FileId: assignResult.Fid, - Offset: i * chunkSize, - Size: uint64(uploadResult.Size), - Mtime: time.Now().UnixNano(), - ETag: uploadResult.ETag, - }) - fmt.Printf("uploaded %s-%d to %s [%d,%d)\n", fileName, i+1, targetUrl, i*chunkSize, i*chunkSize+int64(uploadResult.Size)) + operation.DeleteFiles(copy.masters[0], worker.options.grpcDialOption, fileIds) + return uploadError } - if err := withFilerClient(filerGrpcAddress, func(client filer_pb.SeaweedFilerClient) error { + if err := pb.WithGrpcFilerClient(worker.filerGrpcAddress, worker.options.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { request := &filer_pb.CreateEntryRequest{ - Directory: urlFolder, + Directory: task.destinationUrlPath, Entry: &filer_pb.Entry{ Name: fileName, Attributes: &filer_pb.FuseAttributes{ Crtime: time.Now().Unix(), Mtime: time.Now().Unix(), - Gid: uint32(os.Getgid()), - Uid: uint32(os.Getuid()), - FileSize: uint64(fi.Size()), - FileMode: uint32(fi.Mode()), + Gid: task.gid, + Uid: task.uid, + FileSize: uint64(task.fileSize), + FileMode: uint32(task.fileMode), Mime: mimeType, - Replication: *copy.replication, - Collection: *copy.collection, - TtlSec: int32(util.ParseInt(*copy.ttl, 0)), + Replication: replication, + Collection: collection, + TtlSec: worker.options.ttlSec, }, Chunks: chunks, }, } - if _, err := client.CreateEntry(context.Background(), request); err != nil { + if err := filer_pb.CreateEntry(client, request); err != nil { return fmt.Errorf("update fh: %v", err) } return nil }); err != nil { - fmt.Printf("upload data %v to http://%s%s%s: %v\n", fileName, filerAddress, urlFolder, fileName, err) - return false + return fmt.Errorf("upload data %v to http://%s%s%s: %v\n", fileName, worker.filerHost, task.destinationUrlPath, fileName, err) } - fmt.Printf("copied %s => http://%s%s%s\n", fileName, filerAddress, urlFolder, fileName) + fmt.Printf("copied %s => http://%s%s%s\n", fileName, worker.filerHost, task.destinationUrlPath, fileName) - return true -} - -func isGzipped(filename string) bool { - return strings.ToLower(path.Ext(filename)) == ".gz" + return nil } func detectMimeType(f *os.File) string { head := make([]byte, 512) - f.Seek(0, 0) + f.Seek(0, io.SeekStart) n, err := f.Read(head) if err == io.EOF { return "" } if err != nil { fmt.Printf("read head of %v: %v\n", f.Name(), err) - return "application/octet-stream" + return "" } - f.Seek(0, 0) + f.Seek(0, io.SeekStart) mimeType := http.DetectContentType(head[:n]) - return mimeType -} - -func withFilerClient(filerAddress string, fn func(filer_pb.SeaweedFilerClient) error) error { - - grpcConnection, err := util.GrpcDial(filerAddress) - if err != nil { - return fmt.Errorf("fail to dial %s: %v", filerAddress, err) + if mimeType == "application/octet-stream" { + return "" } - defer grpcConnection.Close() - - client := filer_pb.NewSeaweedFilerClient(grpcConnection) - - return fn(client) + return mimeType } diff --git a/weed/command/filer_export.go b/weed/command/filer_export.go deleted file mode 100644 index 9bd0f3014..000000000 --- a/weed/command/filer_export.go +++ /dev/null @@ -1,133 +0,0 @@ -package command - -import ( - "github.com/chrislusf/seaweedfs/weed/filer2" - "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/server" - "github.com/spf13/viper" -) - -func init() { - cmdFilerExport.Run = runFilerExport // break init cycle -} - -var cmdFilerExport = &Command{ - UsageLine: "filer.export -sourceStore=mysql -targetStroe=cassandra", - Short: "export meta data in filer store", - Long: `Iterate the file tree and export all metadata out - - Both source and target store: - * should be a store name already specified in filer.toml - * do not need to be enabled state - - If target store is empty, only the directory tree will be listed. - - `, -} - -var ( - // filerExportOutputFile = cmdFilerExport.Flag.String("output", "", "the output file. If empty, only list out the directory tree") - filerExportSourceStore = cmdFilerExport.Flag.String("sourceStore", "", "the source store name in filer.toml") - filerExportTargetStore = cmdFilerExport.Flag.String("targetStore", "", "the target store name in filer.toml") - dirListLimit = cmdFilerExport.Flag.Int("dirListLimit", 100000, "limit directory list size") -) - -type statistics struct { - directoryCount int - fileCount int -} - -func runFilerExport(cmd *Command, args []string) bool { - - weed_server.LoadConfiguration("filer", true) - config := viper.GetViper() - - var sourceStore, targetStore filer2.FilerStore - - for _, store := range filer2.Stores { - if store.GetName() == *filerExportSourceStore { - viperSub := config.Sub(store.GetName()) - if err := store.Initialize(viperSub); err != nil { - glog.Fatalf("Failed to initialize store for %s: %+v", - store.GetName(), err) - } else { - sourceStore = store - } - break - } - } - - for _, store := range filer2.Stores { - if store.GetName() == *filerExportTargetStore { - viperSub := config.Sub(store.GetName()) - if err := store.Initialize(viperSub); err != nil { - glog.Fatalf("Failed to initialize store for %s: %+v", - store.GetName(), err) - } else { - targetStore = store - } - break - } - } - - if sourceStore == nil { - glog.Errorf("Failed to find source store %s", *filerExportSourceStore) - println("existing data sources are:") - for _, store := range filer2.Stores { - println(" " + store.GetName()) - } - return false - } - - stat := statistics{} - - var fn func(level int, entry *filer2.Entry) error - - if targetStore == nil { - fn = printout - } else { - fn = func(level int, entry *filer2.Entry) error { - return targetStore.InsertEntry(entry) - } - } - - doTraverse(&stat, sourceStore, filer2.FullPath("/"), 0, fn) - - glog.Infof("processed %d directories, %d files", stat.directoryCount, stat.fileCount) - - return true -} - -func doTraverse(stat *statistics, filerStore filer2.FilerStore, parentPath filer2.FullPath, level int, fn func(level int, entry *filer2.Entry) error) { - - limit := *dirListLimit - lastEntryName := "" - for { - entries, err := filerStore.ListDirectoryEntries(parentPath, lastEntryName, false, limit) - if err != nil { - break - } - for _, entry := range entries { - if fnErr := fn(level, entry); fnErr != nil { - glog.Errorf("failed to process entry: %s", entry.FullPath) - } - if entry.IsDirectory() { - stat.directoryCount++ - doTraverse(stat, filerStore, entry.FullPath, level+1, fn) - } else { - stat.fileCount++ - } - } - if len(entries) < limit { - break - } - } -} - -func printout(level int, entry *filer2.Entry) error { - for i := 0; i < level; i++ { - print(" ") - } - println(entry.FullPath.Name()) - return nil -} diff --git a/weed/command/filer_replication.go b/weed/command/filer_replication.go index 05076143a..737f0d24a 100644 --- a/weed/command/filer_replication.go +++ b/weed/command/filer_replication.go @@ -1,6 +1,7 @@ package command import ( + "context" "strings" "github.com/chrislusf/seaweedfs/weed/glog" @@ -11,7 +12,8 @@ import ( _ "github.com/chrislusf/seaweedfs/weed/replication/sink/filersink" _ "github.com/chrislusf/seaweedfs/weed/replication/sink/gcssink" _ "github.com/chrislusf/seaweedfs/weed/replication/sink/s3sink" - "github.com/chrislusf/seaweedfs/weed/server" + "github.com/chrislusf/seaweedfs/weed/replication/sub" + "github.com/chrislusf/seaweedfs/weed/util" "github.com/spf13/viper" ) @@ -27,22 +29,25 @@ var cmdFilerReplicate = &Command{ filer.replicate listens on filer notifications. If any file is updated, it will fetch the updated content, and write to the other destination. - Run "weed scaffold -config replication" to generate a replication.toml file and customize the parameters. + Run "weed scaffold -config=replication" to generate a replication.toml file and customize the parameters. `, } func runFilerReplicate(cmd *Command, args []string) bool { - weed_server.LoadConfiguration("replication", true) - config := viper.GetViper() + util.LoadConfiguration("security", false) + util.LoadConfiguration("replication", true) + util.LoadConfiguration("notification", true) + config := util.GetViper() - var notificationInput replication.NotificationInput + var notificationInput sub.NotificationInput - for _, input := range replication.NotificationInputs { + validateOneEnabledInput(config) + + for _, input := range sub.NotificationInputs { if config.GetBool("notification." + input.GetName() + ".enabled") { - viperSub := config.Sub("notification." + input.GetName()) - if err := input.Initialize(viperSub); err != nil { + if err := input.Initialize(config, "notification."+input.GetName()+"."); err != nil { glog.Fatalf("Failed to initialize notification input for %s: %+v", input.GetName(), err) } @@ -53,16 +58,16 @@ func runFilerReplicate(cmd *Command, args []string) bool { } if notificationInput == nil { - println("Please follow 'weed scaffold -config=repliaction' to see example notification configurations.") + println("No notification is defined in notification.toml file.") + println("Please follow 'weed scaffold -config=notification' to see example notification configurations.") return true } // avoid recursive replication if config.GetBool("notification.source.filer.enabled") && config.GetBool("notification.sink.filer.enabled") { - sourceConfig, sinkConfig := config.Sub("source.filer"), config.Sub("sink.filer") - if sourceConfig.GetString("grpcAddress") == sinkConfig.GetString("grpcAddress") { - fromDir := sourceConfig.GetString("directory") - toDir := sinkConfig.GetString("directory") + if config.GetString("source.filer.grpcAddress") == config.GetString("sink.filer.grpcAddress") { + fromDir := config.GetString("source.filer.directory") + toDir := config.GetString("sink.filer.directory") if strings.HasPrefix(toDir, fromDir) { glog.Fatalf("recursive replication! source directory %s includes the sink directory %s", fromDir, toDir) } @@ -72,8 +77,7 @@ func runFilerReplicate(cmd *Command, args []string) bool { var dataSink sink.ReplicationSink for _, sk := range sink.Sinks { if config.GetBool("sink." + sk.GetName() + ".enabled") { - viperSub := config.Sub("sink." + sk.GetName()) - if err := sk.Initialize(viperSub); err != nil { + if err := sk.Initialize(config, "sink."+sk.GetName()+"."); err != nil { glog.Fatalf("Failed to initialize sink for %s: %+v", sk.GetName(), err) } @@ -84,14 +88,14 @@ func runFilerReplicate(cmd *Command, args []string) bool { } if dataSink == nil { - println("no data sink configured:") + println("no data sink configured in replication.toml:") for _, sk := range sink.Sinks { println(" " + sk.GetName()) } return true } - replicator := replication.NewReplicator(config.Sub("source.filer"), dataSink) + replicator := replication.NewReplicator(config, "source.filer.", dataSink) for { key, m, err := notificationInput.ReceiveMessage() @@ -99,6 +103,10 @@ func runFilerReplicate(cmd *Command, args []string) bool { glog.Errorf("receive %s: %+v", key, err) continue } + if key == "" { + // long poll received no messages + continue + } if m.OldEntry != nil && m.NewEntry == nil { glog.V(1).Infof("delete: %s", key) } else if m.OldEntry == nil && m.NewEntry != nil { @@ -106,10 +114,25 @@ func runFilerReplicate(cmd *Command, args []string) bool { } else { glog.V(1).Infof("modify: %s", key) } - if err = replicator.Replicate(key, m); err != nil { + if err = replicator.Replicate(context.Background(), key, m); err != nil { glog.Errorf("replicate %s: %+v", key, err) + } else { + glog.V(1).Infof("replicated %s", key) } } return true } + +func validateOneEnabledInput(config *viper.Viper) { + enabledInput := "" + for _, input := range sub.NotificationInputs { + if config.GetBool("notification." + input.GetName() + ".enabled") { + if enabledInput == "" { + enabledInput = input.GetName() + } else { + glog.Fatalf("Notification input is enabled for both %s and %s", enabledInput, input.GetName()) + } + } + } +} diff --git a/weed/command/fix.go b/weed/command/fix.go index 3643c9d58..90d1c4893 100644 --- a/weed/command/fix.go +++ b/weed/command/fix.go @@ -7,6 +7,9 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/storage/types" ) @@ -28,6 +31,32 @@ var ( fixVolumeId = cmdFix.Flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.") ) +type VolumeFileScanner4Fix struct { + version needle.Version + nm *needle_map.MemDb +} + +func (scanner *VolumeFileScanner4Fix) VisitSuperBlock(superBlock super_block.SuperBlock) error { + scanner.version = superBlock.Version + return nil + +} +func (scanner *VolumeFileScanner4Fix) ReadNeedleBody() bool { + return false +} + +func (scanner *VolumeFileScanner4Fix) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { + glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(scanner.version), n.IsGzipped()) + if n.Size > 0 && n.Size != types.TombstoneFileSize { + pe := scanner.nm.Set(n.Id, types.ToOffset(offset), n.Size) + glog.V(2).Infof("saved %d with error %v", n.Size, pe) + } else { + glog.V(2).Infof("skipping deleted file ...") + return scanner.nm.Delete(n.Id) + } + return nil +} + func runFix(cmd *Command, args []string) bool { if *fixVolumeId == -1 { @@ -39,35 +68,22 @@ func runFix(cmd *Command, args []string) bool { baseFileName = *fixVolumeCollection + "_" + baseFileName } indexFileName := path.Join(*fixVolumePath, baseFileName+".idx") - indexFile, err := os.OpenFile(indexFileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) - if err != nil { - glog.Fatalf("Create Volume Index [ERROR] %s\n", err) - } - defer indexFile.Close() - nm := storage.NewBtreeNeedleMap(indexFile) + nm := needle_map.NewMemDb() defer nm.Close() - var version storage.Version - vid := storage.VolumeId(*fixVolumeId) - err = storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid, - storage.NeedleMapInMemory, - func(superBlock storage.SuperBlock) error { - version = superBlock.Version() - return nil - }, false, func(n *storage.Needle, offset int64) error { - glog.V(2).Infof("key %d offset %d size %d disk_size %d gzip %v", n.Id, offset, n.Size, n.DiskSize(version), n.IsGzipped()) - if n.Size > 0 { - pe := nm.Put(n.Id, types.Offset(offset/types.NeedlePaddingSize), n.Size) - glog.V(2).Infof("saved %d with error %v", n.Size, pe) - } else { - glog.V(2).Infof("skipping deleted file ...") - return nm.Delete(n.Id, types.Offset(offset/types.NeedlePaddingSize)) - } - return nil - }) - if err != nil { - glog.Fatalf("Export Volume File [ERROR] %s\n", err) + vid := needle.VolumeId(*fixVolumeId) + scanner := &VolumeFileScanner4Fix{ + nm: nm, + } + + if err := storage.ScanVolumeFile(*fixVolumePath, *fixVolumeCollection, vid, storage.NeedleMapInMemory, scanner); err != nil { + glog.Fatalf("scan .dat File: %v", err) + os.Remove(indexFileName) + } + + if err := nm.SaveToIdx(indexFileName); err != nil { + glog.Fatalf("save to .idx File: %v", err) os.Remove(indexFileName) } diff --git a/weed/command/master.go b/weed/command/master.go index 29c8a6833..1be60426f 100644 --- a/weed/command/master.go +++ b/weed/command/master.go @@ -6,112 +6,144 @@ import ( "runtime" "strconv" "strings" - "time" + + "github.com/chrislusf/raft/protobuf" + "github.com/gorilla/mux" + "google.golang.org/grpc/reflection" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/server" + "github.com/chrislusf/seaweedfs/weed/storage/backend" "github.com/chrislusf/seaweedfs/weed/util" - "github.com/gorilla/mux" - "github.com/soheilhy/cmux" - "google.golang.org/grpc/reflection" ) +var ( + m MasterOptions +) + +type MasterOptions struct { + port *int + ip *string + ipBind *string + metaFolder *string + peers *string + volumeSizeLimitMB *uint + volumePreallocate *bool + pulseSeconds *int + defaultReplication *string + garbageThreshold *float64 + whiteList *string + disableHttp *bool + metricsAddress *string + metricsIntervalSec *int +} + func init() { cmdMaster.Run = runMaster // break init cycle + m.port = cmdMaster.Flag.Int("port", 9333, "http listen port") + m.ip = cmdMaster.Flag.String("ip", "localhost", "master <ip>|<server> address") + m.ipBind = cmdMaster.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to") + m.metaFolder = cmdMaster.Flag.String("mdir", os.TempDir(), "data directory to store meta data") + m.peers = cmdMaster.Flag.String("peers", "", "all master nodes in comma separated ip:port list, example: 127.0.0.1:9093,127.0.0.1:9094") + m.volumeSizeLimitMB = cmdMaster.Flag.Uint("volumeSizeLimitMB", 30*1000, "Master stops directing writes to oversized volumes.") + m.volumePreallocate = cmdMaster.Flag.Bool("volumePreallocate", false, "Preallocate disk space for volumes.") + m.pulseSeconds = cmdMaster.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats") + m.defaultReplication = cmdMaster.Flag.String("defaultReplication", "000", "Default replication type if not specified.") + m.garbageThreshold = cmdMaster.Flag.Float64("garbageThreshold", 0.3, "threshold to vacuum and reclaim spaces") + m.whiteList = cmdMaster.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.") + m.disableHttp = cmdMaster.Flag.Bool("disableHttp", false, "disable http requests, only gRPC operations are allowed.") + m.metricsAddress = cmdMaster.Flag.String("metrics.address", "", "Prometheus gateway address") + m.metricsIntervalSec = cmdMaster.Flag.Int("metrics.intervalSeconds", 15, "Prometheus push interval in seconds") } var cmdMaster = &Command{ UsageLine: "master -port=9333", Short: "start a master server", - Long: `start a master server to provide volume=>location mapping service - and sequence number of file ids + Long: `start a master server to provide volume=>location mapping service and sequence number of file ids + + The configuration file "security.toml" is read from ".", "$HOME/.seaweedfs/", or "/etc/seaweedfs/", in that order. + + The example security.toml configuration file can be generated by "weed scaffold -config=security" `, } var ( - mport = cmdMaster.Flag.Int("port", 9333, "http listen port") - masterIp = cmdMaster.Flag.String("ip", "localhost", "master <ip>|<server> address") - masterBindIp = cmdMaster.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to") - metaFolder = cmdMaster.Flag.String("mdir", os.TempDir(), "data directory to store meta data") - masterPeers = cmdMaster.Flag.String("peers", "", "all master nodes in comma separated ip:port list, example: 127.0.0.1:9093,127.0.0.1:9094") - volumeSizeLimitMB = cmdMaster.Flag.Uint("volumeSizeLimitMB", 30*1000, "Master stops directing writes to oversized volumes.") - volumePreallocate = cmdMaster.Flag.Bool("volumePreallocate", false, "Preallocate disk space for volumes.") - mpulse = cmdMaster.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats") - defaultReplicaPlacement = cmdMaster.Flag.String("defaultReplication", "000", "Default replication type if not specified.") - // mTimeout = cmdMaster.Flag.Int("idleTimeout", 30, "connection idle seconds") - mMaxCpu = cmdMaster.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs") - garbageThreshold = cmdMaster.Flag.Float64("garbageThreshold", 0.3, "threshold to vacuum and reclaim spaces") - masterWhiteListOption = cmdMaster.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.") - masterSecureKey = cmdMaster.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") - masterCpuProfile = cmdMaster.Flag.String("cpuprofile", "", "cpu profile output file") - masterMemProfile = cmdMaster.Flag.String("memprofile", "", "memory profile output file") - - masterWhiteList []string + masterCpuProfile = cmdMaster.Flag.String("cpuprofile", "", "cpu profile output file") + masterMemProfile = cmdMaster.Flag.String("memprofile", "", "memory profile output file") ) func runMaster(cmd *Command, args []string) bool { - if *mMaxCpu < 1 { - *mMaxCpu = runtime.NumCPU() - } - runtime.GOMAXPROCS(*mMaxCpu) + + util.LoadConfiguration("security", false) + util.LoadConfiguration("master", false) + + runtime.GOMAXPROCS(runtime.NumCPU()) util.SetupProfiling(*masterCpuProfile, *masterMemProfile) - if err := util.TestFolderWritable(*metaFolder); err != nil { - glog.Fatalf("Check Meta Folder (-mdir) Writable %s : %s", *metaFolder, err) + if err := util.TestFolderWritable(*m.metaFolder); err != nil { + glog.Fatalf("Check Meta Folder (-mdir) Writable %s : %s", *m.metaFolder, err) } - if *masterWhiteListOption != "" { - masterWhiteList = strings.Split(*masterWhiteListOption, ",") + + var masterWhiteList []string + if *m.whiteList != "" { + masterWhiteList = strings.Split(*m.whiteList, ",") } - if *volumeSizeLimitMB > 30*1000 { + if *m.volumeSizeLimitMB > util.VolumeSizeLimitGB*1000 { glog.Fatalf("volumeSizeLimitMB should be smaller than 30000") } - r := mux.NewRouter() - ms := weed_server.NewMasterServer(r, *mport, *metaFolder, - *volumeSizeLimitMB, *volumePreallocate, - *mpulse, *defaultReplicaPlacement, *garbageThreshold, - masterWhiteList, *masterSecureKey, - ) + startMaster(m, masterWhiteList) + + return true +} + +func startMaster(masterOption MasterOptions, masterWhiteList []string) { - listeningAddress := *masterBindIp + ":" + strconv.Itoa(*mport) + backend.LoadConfiguration(util.GetViper()) - glog.V(0).Infoln("Start Seaweed Master", util.VERSION, "at", listeningAddress) + myMasterAddress, peers := checkPeers(*masterOption.ip, *masterOption.port, *masterOption.peers) - listener, e := util.NewListener(listeningAddress, 0) + r := mux.NewRouter() + ms := weed_server.NewMasterServer(r, masterOption.toMasterOption(masterWhiteList), peers) + listeningAddress := *masterOption.ipBind + ":" + strconv.Itoa(*masterOption.port) + glog.V(0).Infof("Start Seaweed Master %s at %s", util.VERSION, listeningAddress) + masterListener, e := util.NewListener(listeningAddress, 0) if e != nil { glog.Fatalf("Master startup error: %v", e) } - - go func() { - time.Sleep(100 * time.Millisecond) - myMasterAddress, peers := checkPeers(*masterIp, *mport, *masterPeers) - raftServer := weed_server.NewRaftServer(r, peers, myMasterAddress, *metaFolder, ms.Topo, *mpulse) - ms.SetRaftServer(raftServer) - }() - - // start grpc and http server - m := cmux.New(listener) - - grpcL := m.Match(cmux.HTTP2HeaderField("content-type", "application/grpc")) - httpL := m.Match(cmux.Any()) - + // start raftServer + raftServer := weed_server.NewRaftServer(security.LoadClientTLS(util.GetViper(), "grpc.master"), + peers, myMasterAddress, *masterOption.metaFolder, ms.Topo, *masterOption.pulseSeconds) + if raftServer == nil { + glog.Fatalf("please verify %s is writable, see https://github.com/chrislusf/seaweedfs/issues/717", *masterOption.metaFolder) + } + ms.SetRaftServer(raftServer) + r.HandleFunc("/cluster/status", raftServer.StatusHandler).Methods("GET") + // starting grpc server + grpcPort := *masterOption.port + 10000 + grpcL, err := util.NewListener(*masterOption.ipBind+":"+strconv.Itoa(grpcPort), 0) + if err != nil { + glog.Fatalf("master failed to listen on grpc port %d: %v", grpcPort, err) + } // Create your protocol servers. - grpcS := util.NewGrpcServer() + grpcS := pb.NewGrpcServer(security.LoadServerTLS(util.GetViper(), "grpc.master")) master_pb.RegisterSeaweedServer(grpcS, ms) + protobuf.RegisterRaftServer(grpcS, raftServer) reflection.Register(grpcS) - - httpS := &http.Server{Handler: r} - + glog.V(0).Infof("Start Seaweed Master %s grpc server at %s:%d", util.VERSION, *masterOption.ipBind, grpcPort) go grpcS.Serve(grpcL) - go httpS.Serve(httpL) - if err := m.Serve(); err != nil { - glog.Fatalf("master server failed to serve: %v", err) - } + go ms.MasterClient.KeepConnectedToMaster() - return true + // start http server + httpS := &http.Server{Handler: r} + go httpS.Serve(masterListener) + + select {} } func checkPeers(masterIp string, masterPort int, peers string) (masterAddress string, cleanedPeers []string) { @@ -128,12 +160,27 @@ func checkPeers(masterIp string, masterPort int, peers string) (masterAddress st } } - peerCount := len(cleanedPeers) if !hasSelf { - peerCount += 1 + cleanedPeers = append(cleanedPeers, masterAddress) } - if peerCount%2 == 0 { + if len(cleanedPeers)%2 == 0 { glog.Fatalf("Only odd number of masters are supported!") } return } + +func (m *MasterOptions) toMasterOption(whiteList []string) *weed_server.MasterOption { + return &weed_server.MasterOption{ + Port: *m.port, + MetaFolder: *m.metaFolder, + VolumeSizeLimitMB: *m.volumeSizeLimitMB, + VolumePreallocate: *m.volumePreallocate, + PulseSeconds: *m.pulseSeconds, + DefaultReplicaPlacement: *m.defaultReplication, + GarbageThreshold: *m.garbageThreshold, + WhiteList: whiteList, + DisableHttp: *m.disableHttp, + MetricsAddress: *m.metricsAddress, + MetricsIntervalSec: *m.metricsIntervalSec, + } +} diff --git a/weed/command/mount.go b/weed/command/mount.go index 952dcdfcb..f1448c6cc 100644 --- a/weed/command/mount.go +++ b/weed/command/mount.go @@ -1,40 +1,42 @@ package command -import ( - "fmt" - "strconv" - "strings" -) - type MountOptions struct { - filer *string - filerGrpcPort *int - filerMountRootPath *string - dir *string - dirListingLimit *int - collection *string - replication *string - ttlSec *int - chunkSizeLimitMB *int - dataCenter *string + filer *string + filerMountRootPath *string + dir *string + dirListCacheLimit *int64 + collection *string + replication *string + ttlSec *int + chunkSizeLimitMB *int + dataCenter *string + allowOthers *bool + umaskString *string + outsideContainerClusterMode *bool } var ( - mountOptions MountOptions + mountOptions MountOptions + mountCpuProfile *string + mountMemProfile *string ) func init() { cmdMount.Run = runMount // break init cycle mountOptions.filer = cmdMount.Flag.String("filer", "localhost:8888", "weed filer location") - mountOptions.filerGrpcPort = cmdMount.Flag.Int("filer.grpc.port", 0, "filer grpc server listen port, default to http port + 10000") mountOptions.filerMountRootPath = cmdMount.Flag.String("filer.path", "/", "mount this remote path from filer server") mountOptions.dir = cmdMount.Flag.String("dir", ".", "mount weed filer to this directory") - mountOptions.dirListingLimit = cmdMount.Flag.Int("dirListLimit", 100000, "limit directory listing size") + mountOptions.dirListCacheLimit = cmdMount.Flag.Int64("dirListCacheLimit", 1000000, "limit cache size to speed up directory long format listing") mountOptions.collection = cmdMount.Flag.String("collection", "", "collection to create the files") mountOptions.replication = cmdMount.Flag.String("replication", "", "replication(e.g. 000, 001) to create to files. If empty, let filer decide.") mountOptions.ttlSec = cmdMount.Flag.Int("ttl", 0, "file ttl in seconds") - mountOptions.chunkSizeLimitMB = cmdMount.Flag.Int("chunkSizeLimitMB", 16, "local write buffer size, also chunk large files") + mountOptions.chunkSizeLimitMB = cmdMount.Flag.Int("chunkSizeLimitMB", 4, "local write buffer size, also chunk large files") mountOptions.dataCenter = cmdMount.Flag.String("dataCenter", "", "prefer to write to the data center") + mountOptions.allowOthers = cmdMount.Flag.Bool("allowOthers", true, "allows other users to access the file system") + mountOptions.umaskString = cmdMount.Flag.String("umask", "022", "octal umask, e.g., 022, 0111") + mountCpuProfile = cmdMount.Flag.String("cpuprofile", "", "cpu profile output file") + mountMemProfile = cmdMount.Flag.String("memprofile", "", "memory profile output file") + mountOptions.outsideContainerClusterMode = cmdMount.Flag.Bool("outsideContainerClusterMode", false, "allows other users to access the file system") } var cmdMount = &Command{ @@ -47,29 +49,16 @@ var cmdMount = &Command{ 2) have a "weed filer" running These 2 requirements can be achieved with one command "weed server -filer=true" - This uses bazil.org/fuse, which enables writing FUSE file systems on + This uses github.com/seaweedfs/fuse, which enables writing FUSE file systems on Linux, and OS X. On OS X, it requires OSXFUSE (http://osxfuse.github.com/). - `, -} - -func parseFilerGrpcAddress(filer string, optionalGrpcPort int) (filerGrpcAddress string, err error) { - hostnameAndPort := strings.Split(filer, ":") - if len(hostnameAndPort) != 2 { - return "", fmt.Errorf("The filer should have hostname:port format: %v", hostnameAndPort) - } - - filerPort, parseErr := strconv.ParseUint(hostnameAndPort[1], 10, 64) - if parseErr != nil { - return "", fmt.Errorf("The filer filer port parse error: %v", parseErr) - } + If the SeaweedFS system runs in a container cluster, e.g. managed by kubernetes or docker compose, + the volume servers are not accessible by their own ip addresses. + In "outsideContainerClusterMode", the mount will use the filer ip address instead, assuming: + * All volume server containers are accessible through the same hostname or IP address as the filer. + * All volume server container ports are open external to the cluster. - filerGrpcPort := int(filerPort) + 10000 - if optionalGrpcPort != 0 { - filerGrpcPort = optionalGrpcPort - } - - return fmt.Sprintf("%s:%d", hostnameAndPort[0], filerGrpcPort), nil + `, } diff --git a/weed/command/mount_darwin.go b/weed/command/mount_darwin.go new file mode 100644 index 000000000..f0a5581e7 --- /dev/null +++ b/weed/command/mount_darwin.go @@ -0,0 +1,13 @@ +package command + +import ( + "github.com/seaweedfs/fuse" +) + +func osSpecificMountOptions() []fuse.MountOption { + return []fuse.MountOption{} +} + +func checkMountPointAvailable(dir string) bool { + return true +} diff --git a/weed/command/mount_freebsd.go b/weed/command/mount_freebsd.go new file mode 100644 index 000000000..f0a5581e7 --- /dev/null +++ b/weed/command/mount_freebsd.go @@ -0,0 +1,13 @@ +package command + +import ( + "github.com/seaweedfs/fuse" +) + +func osSpecificMountOptions() []fuse.MountOption { + return []fuse.MountOption{} +} + +func checkMountPointAvailable(dir string) bool { + return true +} diff --git a/weed/command/mount_linux.go b/weed/command/mount_linux.go new file mode 100644 index 000000000..80a5f9da4 --- /dev/null +++ b/weed/command/mount_linux.go @@ -0,0 +1,157 @@ +package command + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" + + "github.com/seaweedfs/fuse" +) + +const ( + /* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) + + (1) mount ID: unique identifier of the mount (may be reused after umount) + (2) parent ID: ID of parent (or of self for the top of the mount tree) + (3) major:minor: value of st_dev for files on filesystem + (4) root: root of the mount within the filesystem + (5) mount point: mount point relative to the process's root + (6) mount options: per mount options + (7) optional fields: zero or more fields of the form "tag[:value]" + (8) separator: marks the end of the optional fields + (9) filesystem type: name of filesystem of the form "type[.subtype]" + (10) mount source: filesystem specific information or "none" + (11) super options: per super block options*/ + mountinfoFormat = "%d %d %d:%d %s %s %s %s" +) + +// Info reveals information about a particular mounted filesystem. This +// struct is populated from the content in the /proc/<pid>/mountinfo file. +type Info struct { + // ID is a unique identifier of the mount (may be reused after umount). + ID int + + // Parent indicates the ID of the mount parent (or of self for the top of the + // mount tree). + Parent int + + // Major indicates one half of the device ID which identifies the device class. + Major int + + // Minor indicates one half of the device ID which identifies a specific + // instance of device. + Minor int + + // Root of the mount within the filesystem. + Root string + + // Mountpoint indicates the mount point relative to the process's root. + Mountpoint string + + // Opts represents mount-specific options. + Opts string + + // Optional represents optional fields. + Optional string + + // Fstype indicates the type of filesystem, such as EXT3. + Fstype string + + // Source indicates filesystem specific information or "none". + Source string + + // VfsOpts represents per super block options. + VfsOpts string +} + +// Mounted determines if a specified mountpoint has been mounted. +// On Linux it looks at /proc/self/mountinfo and on Solaris at mnttab. +func mounted(mountPoint string) (bool, error) { + entries, err := parseMountTable() + if err != nil { + return false, err + } + + // Search the table for the mountPoint + for _, e := range entries { + if e.Mountpoint == mountPoint { + return true, nil + } + } + return false, nil +} + +// Parse /proc/self/mountinfo because comparing Dev and ino does not work from +// bind mounts +func parseMountTable() ([]*Info, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer f.Close() + + return parseInfoFile(f) +} + +func parseInfoFile(r io.Reader) ([]*Info, error) { + var ( + s = bufio.NewScanner(r) + out []*Info + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + var ( + p = &Info{} + text = s.Text() + optionalFields string + ) + + if _, err := fmt.Sscanf(text, mountinfoFormat, + &p.ID, &p.Parent, &p.Major, &p.Minor, + &p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil { + return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err) + } + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + if len(postSeparatorFields) < 3 { + return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + if optionalFields != "-" { + p.Optional = optionalFields + } + + p.Fstype = postSeparatorFields[0] + p.Source = postSeparatorFields[1] + p.VfsOpts = strings.Join(postSeparatorFields[2:], " ") + out = append(out, p) + } + return out, nil +} + +func osSpecificMountOptions() []fuse.MountOption { + return []fuse.MountOption{ + fuse.AllowNonEmptyMount(), + } +} + +func checkMountPointAvailable(dir string) bool { + mountPoint := dir + if mountPoint != "/" && strings.HasSuffix(mountPoint, "/") { + mountPoint = mountPoint[0 : len(mountPoint)-1] + } + + if mounted, err := mounted(mountPoint); err != nil || mounted { + return false + } + + return true +} diff --git a/weed/command/mount_notsupported.go b/weed/command/mount_notsupported.go index 3bf22ddc4..f3c0de3d6 100644 --- a/weed/command/mount_notsupported.go +++ b/weed/command/mount_notsupported.go @@ -1,5 +1,6 @@ // +build !linux // +build !darwin +// +build !freebsd package command diff --git a/weed/command/mount_std.go b/weed/command/mount_std.go index 92dcc9008..9177091a5 100644 --- a/weed/command/mount_std.go +++ b/weed/command/mount_std.go @@ -1,86 +1,202 @@ -// +build linux darwin +// +build linux darwin freebsd package command import ( + "context" "fmt" + "os" + "os/user" + "path" "runtime" + "strconv" + "strings" + "time" + + "github.com/jacobsa/daemonize" - "bazil.org/fuse" - "bazil.org/fuse/fs" "github.com/chrislusf/seaweedfs/weed/filesys" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/util" - "strings" + "github.com/seaweedfs/fuse" + "github.com/seaweedfs/fuse/fs" ) func runMount(cmd *Command, args []string) bool { + + util.SetupProfiling(*mountCpuProfile, *mountMemProfile) + + umask, umaskErr := strconv.ParseUint(*mountOptions.umaskString, 8, 64) + if umaskErr != nil { + fmt.Printf("can not parse umask %s", *mountOptions.umaskString) + return false + } + + return RunMount( + *mountOptions.filer, + *mountOptions.filerMountRootPath, + *mountOptions.dir, + *mountOptions.collection, + *mountOptions.replication, + *mountOptions.dataCenter, + *mountOptions.chunkSizeLimitMB, + *mountOptions.allowOthers, + *mountOptions.ttlSec, + *mountOptions.dirListCacheLimit, + os.FileMode(umask), + *mountOptions.outsideContainerClusterMode, + ) +} + +func RunMount(filer, filerMountRootPath, dir, collection, replication, dataCenter string, chunkSizeLimitMB int, + allowOthers bool, ttlSec int, dirListCacheLimit int64, umask os.FileMode, outsideContainerClusterMode bool) bool { + + util.LoadConfiguration("security", false) + fmt.Printf("This is SeaweedFS version %s %s %s\n", util.VERSION, runtime.GOOS, runtime.GOARCH) - if *mountOptions.dir == "" { + if dir == "" { fmt.Printf("Please specify the mount directory via \"-dir\"") return false } - if *mountOptions.chunkSizeLimitMB <= 0 { + if chunkSizeLimitMB <= 0 { fmt.Printf("Please specify a reasonable buffer size.") return false } - fuse.Unmount(*mountOptions.dir) + fuse.Unmount(dir) - c, err := fuse.Mount( - *mountOptions.dir, - fuse.VolumeName("SeaweedFS"), - fuse.FSName("SeaweedFS"), - fuse.NoAppleDouble(), + uid, gid := uint32(0), uint32(0) + + // detect mount folder mode + mountMode := os.ModeDir | 0755 + fileInfo, err := os.Stat(dir) + if err == nil { + mountMode = os.ModeDir | fileInfo.Mode() + uid, gid = util.GetFileUidGid(fileInfo) + fmt.Printf("mount point owner uid=%d gid=%d mode=%s\n", uid, gid, fileInfo.Mode()) + } + + if uid == 0 { + if u, err := user.Current(); err == nil { + if parsedId, pe := strconv.ParseUint(u.Uid, 10, 32); pe == nil { + uid = uint32(parsedId) + } + if parsedId, pe := strconv.ParseUint(u.Gid, 10, 32); pe == nil { + gid = uint32(parsedId) + } + fmt.Printf("current uid=%d gid=%d\n", uid, gid) + } + } + + // Ensure target mount point availability + if isValid := checkMountPointAvailable(dir); !isValid { + glog.Fatalf("Expected mount to still be active, target mount point: %s, please check!", dir) + return false + } + + mountName := path.Base(dir) + + options := []fuse.MountOption{ + fuse.VolumeName(mountName), + fuse.FSName(filer + ":" + filerMountRootPath), + fuse.Subtype("seaweedfs"), + // fuse.NoAppleDouble(), // include .DS_Store, otherwise can not delete non-empty folders fuse.NoAppleXattr(), + fuse.NoBrowse(), + fuse.AutoXattr(), fuse.ExclCreate(), fuse.DaemonTimeout("3600"), - fuse.AllowOther(), fuse.AllowSUID(), fuse.DefaultPermissions(), - // fuse.MaxReadahead(1024*128), // TODO: not tested yet, possibly improving read performance + fuse.MaxReadahead(1024 * 128), fuse.AsyncRead(), fuse.WritebackCache(), - ) + fuse.AllowNonEmptyMount(), + } + + options = append(options, osSpecificMountOptions()...) + + if allowOthers { + options = append(options, fuse.AllowOther()) + } + + c, err := fuse.Mount(dir, options...) if err != nil { - glog.Fatal(err) - return false + glog.V(0).Infof("mount: %v", err) + daemonize.SignalOutcome(err) + return true } util.OnInterrupt(func() { - fuse.Unmount(*mountOptions.dir) + fuse.Unmount(dir) c.Close() }) - filerGrpcAddress, err := parseFilerGrpcAddress(*mountOptions.filer, *mountOptions.filerGrpcPort) + // parse filer grpc address + filerGrpcAddress, err := pb.ParseFilerGrpcAddress(filer) + if err != nil { + glog.V(0).Infof("ParseFilerGrpcAddress: %v", err) + daemonize.SignalOutcome(err) + return true + } + + // try to connect to filer, filerBucketsPath may be useful later + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") + var cipher bool + err = pb.WithGrpcFilerClient(filerGrpcAddress, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get filer %s configuration: %v", filerGrpcAddress, err) + } + cipher = resp.Cipher + return nil + }) if err != nil { glog.Fatal(err) return false } - mountRoot := *mountOptions.filerMountRootPath + // find mount point + mountRoot := filerMountRootPath if mountRoot != "/" && strings.HasSuffix(mountRoot, "/") { mountRoot = mountRoot[0 : len(mountRoot)-1] } + daemonize.SignalOutcome(nil) + err = fs.Serve(c, filesys.NewSeaweedFileSystem(&filesys.Option{ - FilerGrpcAddress: filerGrpcAddress, - FilerMountRootPath: mountRoot, - Collection: *mountOptions.collection, - Replication: *mountOptions.replication, - TtlSec: int32(*mountOptions.ttlSec), - ChunkSizeLimit: int64(*mountOptions.chunkSizeLimitMB) * 1024 * 1024, - DataCenter: *mountOptions.dataCenter, - DirListingLimit: *mountOptions.dirListingLimit, + FilerGrpcAddress: filerGrpcAddress, + GrpcDialOption: grpcDialOption, + FilerMountRootPath: mountRoot, + Collection: collection, + Replication: replication, + TtlSec: int32(ttlSec), + ChunkSizeLimit: int64(chunkSizeLimitMB) * 1024 * 1024, + DataCenter: dataCenter, + DirListCacheLimit: dirListCacheLimit, + EntryCacheTtl: 3 * time.Second, + MountUid: uid, + MountGid: gid, + MountMode: mountMode, + MountCtime: fileInfo.ModTime(), + MountMtime: time.Now(), + Umask: umask, + OutsideContainerClusterMode: outsideContainerClusterMode, + Cipher: cipher, })) if err != nil { - fuse.Unmount(*mountOptions.dir) + fuse.Unmount(dir) } // check if the mount process has an error to report <-c.Ready if err := c.MountError; err != nil { - glog.Fatal(err) + glog.V(0).Infof("mount process: %v", err) + daemonize.SignalOutcome(err) + return true } return true diff --git a/weed/command/msg_broker.go b/weed/command/msg_broker.go new file mode 100644 index 000000000..3e13b4730 --- /dev/null +++ b/weed/command/msg_broker.go @@ -0,0 +1,107 @@ +package command + +import ( + "context" + "fmt" + "strconv" + "time" + + "google.golang.org/grpc/reflection" + + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/pb/queue_pb" + "github.com/chrislusf/seaweedfs/weed/security" + weed_server "github.com/chrislusf/seaweedfs/weed/server" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + messageBrokerStandaloneOptions QueueOptions +) + +type QueueOptions struct { + filer *string + port *int + defaultTtl *string +} + +func init() { + cmdMsgBroker.Run = runMsgBroker // break init cycle + messageBrokerStandaloneOptions.filer = cmdMsgBroker.Flag.String("filer", "localhost:8888", "filer server address") + messageBrokerStandaloneOptions.port = cmdMsgBroker.Flag.Int("port", 17777, "queue server gRPC listen port") + messageBrokerStandaloneOptions.defaultTtl = cmdMsgBroker.Flag.String("ttl", "1h", "time to live, e.g.: 1m, 1h, 1d, 1M, 1y") +} + +var cmdMsgBroker = &Command{ + UsageLine: "msg.broker [-port=17777] [-filer=<ip:port>]", + Short: "<WIP> start a message queue broker", + Long: `start a message queue broker + + The broker can accept gRPC calls to write or read messages. The messages are stored via filer. + The brokers are stateless. To scale up, just add more brokers. + +`, +} + +func runMsgBroker(cmd *Command, args []string) bool { + + util.LoadConfiguration("security", false) + + return messageBrokerStandaloneOptions.startQueueServer() + +} + +func (msgBrokerOpt *QueueOptions) startQueueServer() bool { + + filerGrpcAddress, err := pb.ParseFilerGrpcAddress(*msgBrokerOpt.filer) + if err != nil { + glog.Fatal(err) + return false + } + + filerQueuesPath := "/queues" + + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") + + for { + err = pb.WithGrpcFilerClient(filerGrpcAddress, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get filer %s configuration: %v", filerGrpcAddress, err) + } + filerQueuesPath = resp.DirQueues + glog.V(0).Infof("Queue read filer queues dir: %s", filerQueuesPath) + return nil + }) + if err != nil { + glog.V(0).Infof("wait to connect to filer %s grpc address %s", *msgBrokerOpt.filer, filerGrpcAddress) + time.Sleep(time.Second) + } else { + glog.V(0).Infof("connected to filer %s grpc address %s", *msgBrokerOpt.filer, filerGrpcAddress) + break + } + } + + qs, err := weed_server.NewMessageBroker(&weed_server.MessageBrokerOption{ + Filers: []string{*msgBrokerOpt.filer}, + DefaultReplication: "", + MaxMB: 0, + Port: *msgBrokerOpt.port, + }) + + // start grpc listener + grpcL, err := util.NewListener(":"+strconv.Itoa(*msgBrokerOpt.port), 0) + if err != nil { + glog.Fatalf("failed to listen on grpc port %d: %v", *msgBrokerOpt.port, err) + } + grpcS := pb.NewGrpcServer(security.LoadServerTLS(util.GetViper(), "grpc.msg_broker")) + queue_pb.RegisterSeaweedQueueServer(grpcS, qs) + reflection.Register(grpcS) + grpcS.Serve(grpcL) + + return true + +} diff --git a/weed/command/s3.go b/weed/command/s3.go index 16a9490ff..cd4018fbc 100644 --- a/weed/command/s3.go +++ b/weed/command/s3.go @@ -1,64 +1,161 @@ package command import ( + "context" + "fmt" "net/http" "time" - "fmt" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/security" + + "github.com/gorilla/mux" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/s3api" "github.com/chrislusf/seaweedfs/weed/util" - "github.com/gorilla/mux" ) var ( - s3options S3Options + s3StandaloneOptions S3Options ) type S3Options struct { - filer *string - filerGrpcPort *int - filerBucketsPath *string - port *int - domainName *string - tlsPrivateKey *string - tlsCertificate *string + filer *string + port *int + config *string + domainName *string + tlsPrivateKey *string + tlsCertificate *string } func init() { cmdS3.Run = runS3 // break init cycle - s3options.filer = cmdS3.Flag.String("filer", "localhost:8888", "filer server address") - s3options.filerGrpcPort = cmdS3.Flag.Int("filer.grpcPort", 0, "filer server grpc port, default to filer http port plus 10000") - s3options.filerBucketsPath = cmdS3.Flag.String("filer.dir.buckets", "/buckets", "folder on filer to store all buckets") - s3options.port = cmdS3.Flag.Int("port", 8333, "s3options server http listen port") - s3options.domainName = cmdS3.Flag.String("domainName", "", "suffix of the host name, {bucket}.{domainName}") - s3options.tlsPrivateKey = cmdS3.Flag.String("key.file", "", "path to the TLS private key file") - s3options.tlsCertificate = cmdS3.Flag.String("cert.file", "", "path to the TLS certificate file") + s3StandaloneOptions.filer = cmdS3.Flag.String("filer", "localhost:8888", "filer server address") + s3StandaloneOptions.port = cmdS3.Flag.Int("port", 8333, "s3 server http listen port") + s3StandaloneOptions.domainName = cmdS3.Flag.String("domainName", "", "suffix of the host name, {bucket}.{domainName}") + s3StandaloneOptions.config = cmdS3.Flag.String("config", "", "path to the config file") + s3StandaloneOptions.tlsPrivateKey = cmdS3.Flag.String("key.file", "", "path to the TLS private key file") + s3StandaloneOptions.tlsCertificate = cmdS3.Flag.String("cert.file", "", "path to the TLS certificate file") } var cmdS3 = &Command{ - UsageLine: "s3 -port=8333 -filer=<ip:port>", + UsageLine: "s3 [-port=8333] [-filer=<ip:port>] [-config=</path/to/config.json>]", Short: "start a s3 API compatible server that is backed by a filer", Long: `start a s3 API compatible server that is backed by a filer. + By default, you can use any access key and secret key to access the S3 APIs. + To enable credential based access, create a config.json file similar to this: + +{ + "identities": [ + { + "name": "some_name", + "credentials": [ + { + "accessKey": "some_access_key1", + "secretKey": "some_secret_key1" + } + ], + "actions": [ + "Admin", + "Read", + "Write" + ] + }, + { + "name": "some_read_only_user", + "credentials": [ + { + "accessKey": "some_access_key2", + "secretKey": "some_secret_key2" + } + ], + "actions": [ + "Read" + ] + }, + { + "name": "some_normal_user", + "credentials": [ + { + "accessKey": "some_access_key3", + "secretKey": "some_secret_key3" + } + ], + "actions": [ + "Read", + "Write" + ] + }, + { + "name": "user_limited_to_bucket1", + "credentials": [ + { + "accessKey": "some_access_key4", + "secretKey": "some_secret_key4" + } + ], + "actions": [ + "Read:bucket1", + "Write:bucket1" + ] + } + ] +} + `, } func runS3(cmd *Command, args []string) bool { - filerGrpcAddress, err := parseFilerGrpcAddress(*s3options.filer, *s3options.filerGrpcPort) + util.LoadConfiguration("security", false) + + return s3StandaloneOptions.startS3Server() + +} + +func (s3opt *S3Options) startS3Server() bool { + + filerGrpcAddress, err := pb.ParseFilerGrpcAddress(*s3opt.filer) if err != nil { glog.Fatal(err) return false } + filerBucketsPath := "/buckets" + + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") + + for { + err = pb.WithGrpcFilerClient(filerGrpcAddress, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get filer %s configuration: %v", filerGrpcAddress, err) + } + filerBucketsPath = resp.DirBuckets + glog.V(0).Infof("S3 read filer buckets dir: %s", filerBucketsPath) + return nil + }) + if err != nil { + glog.V(0).Infof("wait to connect to filer %s grpc address %s", *s3opt.filer, filerGrpcAddress) + time.Sleep(time.Second) + } else { + glog.V(0).Infof("connected to filer %s grpc address %s", *s3opt.filer, filerGrpcAddress) + break + } + } + router := mux.NewRouter().SkipClean(true) _, s3ApiServer_err := s3api.NewS3ApiServer(router, &s3api.S3ApiServerOption{ - Filer: *s3options.filer, + Filer: *s3opt.filer, FilerGrpcAddress: filerGrpcAddress, - DomainName: *s3options.domainName, - BucketsPath: *s3options.filerBucketsPath, + Config: *s3opt.config, + DomainName: *s3opt.domainName, + BucketsPath: filerBucketsPath, + GrpcDialOption: grpcDialOption, }) if s3ApiServer_err != nil { glog.Fatalf("S3 API Server startup error: %v", s3ApiServer_err) @@ -66,22 +163,22 @@ func runS3(cmd *Command, args []string) bool { httpS := &http.Server{Handler: router} - listenAddress := fmt.Sprintf(":%d", *s3options.port) + listenAddress := fmt.Sprintf(":%d", *s3opt.port) s3ApiListener, err := util.NewListener(listenAddress, time.Duration(10)*time.Second) if err != nil { glog.Fatalf("S3 API Server listener on %s error: %v", listenAddress, err) } - if *s3options.tlsPrivateKey != "" { - if err = httpS.ServeTLS(s3ApiListener, *s3options.tlsCertificate, *s3options.tlsPrivateKey); err != nil { + if *s3opt.tlsPrivateKey != "" { + glog.V(0).Infof("Start Seaweed S3 API Server %s at https port %d", util.VERSION, *s3opt.port) + if err = httpS.ServeTLS(s3ApiListener, *s3opt.tlsCertificate, *s3opt.tlsPrivateKey); err != nil { glog.Fatalf("S3 API Server Fail to serve: %v", err) } - glog.V(0).Infof("Start Seaweed S3 API Server %s at https port %d", util.VERSION, *s3options.port) } else { + glog.V(0).Infof("Start Seaweed S3 API Server %s at http port %d", util.VERSION, *s3opt.port) if err = httpS.Serve(s3ApiListener); err != nil { glog.Fatalf("S3 API Server Fail to serve: %v", err) } - glog.V(0).Infof("Start Seaweed S3 API Server %s at http port %d", util.VERSION, *s3options.port) } return true diff --git a/weed/command/scaffold.go b/weed/command/scaffold.go index 95ddbd57c..cb20adc72 100644 --- a/weed/command/scaffold.go +++ b/weed/command/scaffold.go @@ -10,16 +10,24 @@ func init() { } var cmdScaffold = &Command{ - UsageLine: "scaffold [filer]", + UsageLine: "scaffold -config=[filer|notification|replication|security|master]", Short: "generate basic configuration files", Long: `Generate filer.toml with all possible configurations for you to customize. + The options can also be overwritten by environment variables. + For example, the filer.toml mysql password can be overwritten by environment variable + export WEED_MYSQL_PASSWORD=some_password + Environment variable rules: + * Prefix fix with "WEED_" + * Upppercase the reset of variable name. + * Replace '.' with '_' + `, } var ( outputPath = cmdScaffold.Flag.String("output", "", "if not empty, save the configuration file to this directory") - config = cmdScaffold.Flag.String("config", "filer", "[filer|replication] the configuration file to generate") + config = cmdScaffold.Flag.String("config", "filer", "[filer|notification|replication|security|master] the configuration file to generate") ) func runScaffold(cmd *Command, args []string) bool { @@ -28,8 +36,14 @@ func runScaffold(cmd *Command, args []string) bool { switch *config { case "filer": content = FILER_TOML_EXAMPLE + case "notification": + content = NOTIFICATION_TOML_EXAMPLE case "replication": content = REPLICATION_TOML_EXAMPLE + case "security": + content = SECURITY_TOML_EXAMPLE + case "master": + content = MASTER_TOML_EXAMPLE } if content == "" { println("need a valid -config option") @@ -37,7 +51,7 @@ func runScaffold(cmd *Command, args []string) bool { } if *outputPath != "" { - ioutil.WriteFile(filepath.Join(*outputPath, *config+".toml"), []byte(content), 0x755) + ioutil.WriteFile(filepath.Join(*outputPath, *config+".toml"), []byte(content), 0644) } else { println(content) } @@ -53,27 +67,37 @@ const ( # $HOME/.seaweedfs/filer.toml # /etc/seaweedfs/filer.toml -[memory] -# local in memory, mostly for testing purpose -enabled = false +#################################################### +# Customizable filer server options +#################################################### +[filer.options] +# with http DELETE, by default the filer would check whether a folder is empty. +# recursive_delete will delete all sub folders and files, similar to "rm -Rf" +recursive_delete = false +# directories under this folder will be automatically creating a separate bucket +buckets_folder = "/buckets" +# directories under this folder will be store message queue data +queues_folder = "/queues" -[leveldb] +#################################################### +# The following are filer store options +#################################################### + +[leveldb2] # local on disk, mostly for simple single-machine setup, fairly scalable +# faster than previous leveldb, recommended. enabled = true dir = "." # directory to store level db files -#################################################### -# multiple filers on shared storage, fairly scalable -#################################################### - -[mysql] +[mysql] # or tidb # CREATE TABLE IF NOT EXISTS filemeta ( -# dirhash BIGINT COMMENT 'first 64 bits of MD5 hash value of directory field', -# name VARCHAR(1000) COMMENT 'directory or file name', -# directory VARCHAR(4096) COMMENT 'full path to parent directory', -# meta BLOB, +# dirhash BIGINT COMMENT 'first 64 bits of MD5 hash value of directory field', +# name VARCHAR(1000) COMMENT 'directory or file name', +# directory TEXT COMMENT 'full path to parent directory', +# meta LONGBLOB, # PRIMARY KEY (dirhash, name) # ) DEFAULT CHARSET=utf8; + enabled = false hostname = "localhost" port = 3306 @@ -82,12 +106,13 @@ password = "" database = "" # create or use an existing database connection_max_idle = 2 connection_max_open = 100 +interpolateParams = false -[postgres] +[postgres] # or cockroachdb # CREATE TABLE IF NOT EXISTS filemeta ( # dirhash BIGINT, -# name VARCHAR(1000), -# directory VARCHAR(4096), +# name VARCHAR(65535), +# directory VARCHAR(65535), # meta bytea, # PRIMARY KEY (dirhash, name) # ); @@ -118,7 +143,7 @@ hosts=[ enabled = false address = "localhost:6379" password = "" -db = 0 +database = 0 [redis_cluster] enabled = false @@ -130,23 +155,75 @@ addresses = [ "localhost:30005", "localhost:30006", ] +password = "" +# allows reads from slave servers or the master, but all writes still go to the master +readOnly = true +# automatically use the closest Redis server for reads +routeByLatency = true + +[etcd] +enabled = false +servers = "localhost:2379" +timeout = "3s" +` + + NOTIFICATION_TOML_EXAMPLE = ` +# A sample TOML config file for SeaweedFS filer store +# Used by both "weed filer" or "weed server -filer" and "weed filer.replicate" +# Put this file to one of the location, with descending priority +# ./notification.toml +# $HOME/.seaweedfs/notification.toml +# /etc/seaweedfs/notification.toml #################################################### # notification -# sends filer updates for each file to an external message queue +# send and receive filer updates for each file to an external message queue #################################################### [notification.log] +# this is only for debugging perpose and does not work with "weed filer.replicate" enabled = false + [notification.kafka] enabled = false hosts = [ "localhost:9092" ] topic = "seaweedfs_filer" +offsetFile = "./last.offset" +offsetSaveIntervalSeconds = 10 + + +[notification.aws_sqs] +# experimental, let me know if it works +enabled = false +aws_access_key_id = "" # if empty, loads from the shared credentials file (~/.aws/credentials). +aws_secret_access_key = "" # if empty, loads from the shared credentials file (~/.aws/credentials). +region = "us-east-2" +sqs_queue_name = "my_filer_queue" # an existing queue name + +[notification.google_pub_sub] +# read credentials doc at https://cloud.google.com/docs/authentication/getting-started +enabled = false +google_application_credentials = "/path/to/x.json" # path to json credential file +project_id = "" # an existing project id +topic = "seaweedfs_filer_topic" # a topic, auto created if does not exists + +[notification.gocdk_pub_sub] +# The Go Cloud Development Kit (https://gocloud.dev). +# PubSub API (https://godoc.org/gocloud.dev/pubsub). +# Supports AWS SNS/SQS, Azure Service Bus, Google PubSub, NATS and RabbitMQ. +enabled = false +# This URL will Dial the RabbitMQ server at the URL in the environment +# variable RABBIT_SERVER_URL and open the exchange "myexchange". +# The exchange must have already been created by some other means, like +# the RabbitMQ management plugin. +topic_url = "rabbit://myexchange" +sub_url = "rabbit://myqueue" ` + REPLICATION_TOML_EXAMPLE = ` # A sample TOML config file for replicating SeaweedFS filer # Used with "weed filer.replicate" @@ -158,34 +235,31 @@ topic = "seaweedfs_filer" [source.filer] enabled = true grpcAddress = "localhost:18888" -directory = "/buckets" # all files under this directory tree are replicated - -[notification.kafka] -enabled = false -hosts = [ - "localhost:9092" -] -topic = "seaweedfs_filer1_to_filer2" -offsetFile = "./last.offset" -offsetSaveIntervalSeconds = 10 +# all files under this directory tree are replicated. +# this is not a directory on your hard drive, but on your filer. +# i.e., all files with this "prefix" are sent to notification message queue. +directory = "/buckets" [sink.filer] enabled = false grpcAddress = "localhost:18888" -directory = "/backup" # all replicated files are under this directory tree +# all replicated files are under this directory tree +# this is not a directory on your hard drive, but on your filer. +# i.e., all received files will be "prefixed" to this directory. +directory = "/backup" replication = "" collection = "" ttlSec = 0 [sink.s3] # read credentials doc at https://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/sessions.html -# default loads credentials from the shared credentials file (~/.aws/credentials). +# default loads credentials from the shared credentials file (~/.aws/credentials). enabled = false aws_access_key_id = "" # if empty, loads from the shared credentials file (~/.aws/credentials). aws_secret_access_key = "" # if empty, loads from the shared credentials file (~/.aws/credentials). region = "us-east-2" bucket = "your_bucket_name" # an existing bucket -directory = "" # destination directory (do not prefix or suffix with "/") +directory = "/" # destination directory [sink.google_cloud_storage] # read credentials doc at https://cloud.google.com/docs/authentication/getting-started @@ -200,15 +274,124 @@ enabled = false account_name = "" account_key = "" container = "mycontainer" # an existing container -directory = "" # destination directory (do not prefix or suffix with "/") +directory = "/" # destination directory [sink.backblaze] -# experimental, let me know if it works enabled = false -account_id = "" -account_key = "" +b2_account_id = "" +b2_master_application_key = "" bucket = "mybucket" # an existing bucket -directory = "" # destination directory (do not prefix or suffix with "/") +directory = "/" # destination directory + +` + + SECURITY_TOML_EXAMPLE = ` +# Put this file to one of the location, with descending priority +# ./security.toml +# $HOME/.seaweedfs/security.toml +# /etc/seaweedfs/security.toml +# this file is read by master, volume server, and filer + +# the jwt signing key is read by master and volume server. +# a jwt defaults to expire after 10 seconds. +[jwt.signing] +key = "" +expires_after_seconds = 10 # seconds + +# jwt for read is only supported with master+volume setup. Filer does not support this mode. +[jwt.signing.read] +key = "" +expires_after_seconds = 10 # seconds + +# all grpc tls authentications are mutual +# the values for the following ca, cert, and key are paths to the PERM files. +# the host name is not checked, so the PERM files can be shared. +[grpc] +ca = "" + +[grpc.volume] +cert = "" +key = "" + +[grpc.master] +cert = "" +key = "" + +[grpc.filer] +cert = "" +key = "" + +[grpc.msg_broker] +cert = "" +key = "" + +# use this for any place needs a grpc client +# i.e., "weed backup|benchmark|filer.copy|filer.replicate|mount|s3|upload" +[grpc.client] +cert = "" +key = "" + + +# volume server https options +# Note: work in progress! +# this does not work with other clients, e.g., "weed filer|mount" etc, yet. +[https.client] +enabled = true +[https.volume] +cert = "" +key = "" + + +` + + MASTER_TOML_EXAMPLE = ` +# Put this file to one of the location, with descending priority +# ./master.toml +# $HOME/.seaweedfs/master.toml +# /etc/seaweedfs/master.toml +# this file is read by master + +[master.maintenance] +# periodically run these scripts are the same as running them from 'weed shell' +scripts = """ + ec.encode -fullPercent=95 -quietFor=1h + ec.rebuild -force + ec.balance -force + volume.balance -force +""" +sleep_minutes = 17 # sleep minutes between each script execution + +[master.filer] +default_filer_url = "http://localhost:8888/" + +[master.sequencer] +type = "memory" # Choose [memory|etcd] type for storing the file id sequence +# when sequencer.type = etcd, set listen client urls of etcd cluster that store file id sequence +# example : http://127.0.0.1:2379,http://127.0.0.1:2389 +sequencer_etcd_urls = "http://127.0.0.1:2379" + + +# configurations for tiered cloud storage +# old volumes are transparently moved to cloud for cost efficiency +[storage.backend] + [storage.backend.s3.default] + enabled = false + aws_access_key_id = "" # if empty, loads from the shared credentials file (~/.aws/credentials). + aws_secret_access_key = "" # if empty, loads from the shared credentials file (~/.aws/credentials). + region = "us-east-2" + bucket = "your_bucket_name" # an existing bucket + +# create this number of logical volumes if no more writable volumes +# count_x means how many copies of data. +# e.g.: +# 000 has only one copy, count_1 +# 010 and 001 has two copies, count_2 +# 011 has only 3 copies, count_3 +[master.volume_growth] +count_1 = 7 # create 1 x 7 = 7 actual volumes +count_2 = 6 # create 2 x 6 = 12 actual volumes +count_3 = 3 # create 3 x 3 = 9 actual volumes +count_other = 1 # create n x 1 = n actual volumes ` ) diff --git a/weed/command/scaffold_test.go b/weed/command/scaffold_test.go new file mode 100644 index 000000000..423dacc32 --- /dev/null +++ b/weed/command/scaffold_test.go @@ -0,0 +1,44 @@ +package command + +import ( + "bytes" + "fmt" + "testing" + + "github.com/spf13/viper" +) + +func TestReadingTomlConfiguration(t *testing.T) { + + viper.SetConfigType("toml") + + // any approach to require this configuration into your program. + var tomlExample = []byte(` +[database] +server = "192.168.1.1" +ports = [ 8001, 8001, 8002 ] +connection_max = 5000 +enabled = true + +[servers] + + # You can indent as you please. Tabs or spaces. TOML don't care. + [servers.alpha] + ip = "10.0.0.1" + dc = "eqdc10" + + [servers.beta] + ip = "10.0.0.2" + dc = "eqdc10" + +`) + + viper.ReadConfig(bytes.NewBuffer(tomlExample)) + + fmt.Printf("database is %v\n", viper.Get("database")) + fmt.Printf("servers is %v\n", viper.GetStringMap("servers")) + + alpha := viper.Sub("servers.alpha") + + fmt.Printf("alpha ip is %v\n", alpha.GetString("ip")) +} diff --git a/weed/command/server.go b/weed/command/server.go index 21f39924c..560b90037 100644 --- a/weed/command/server.go +++ b/weed/command/server.go @@ -1,22 +1,15 @@ package command import ( - "net/http" + "fmt" "os" "runtime" "runtime/pprof" - "strconv" "strings" - "sync" "time" "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/pb/master_pb" - "github.com/chrislusf/seaweedfs/weed/server" "github.com/chrislusf/seaweedfs/weed/util" - "github.com/gorilla/mux" - "github.com/soheilhy/cmux" - "google.golang.org/grpc/reflection" ) type ServerOptions struct { @@ -26,7 +19,9 @@ type ServerOptions struct { var ( serverOptions ServerOptions + masterOptions MasterOptions filerOptions FilerOptions + s3Options S3Options ) func init() { @@ -35,68 +30,81 @@ func init() { var cmdServer = &Command{ UsageLine: "server -port=8080 -dir=/tmp -volume.max=5 -ip=server_name", - Short: "start a server, including volume server, and automatically elect a master server", + Short: "start a master server, a volume server, and optionally a filer and a S3 gateway", Long: `start both a volume server to provide storage spaces and a master server to provide volume=>location mapping service and sequence number of file ids This is provided as a convenient way to start both volume server and master server. - The servers are exactly the same as starting them separately. + The servers acts exactly the same as starting them separately. + So other volume servers can connect to this master server also. - So other volume servers can use this embedded master server also. - - Optionally, one filer server can be started. Logically, filer servers should not be in a cluster. - They run with meta data on disk, not shared. So each filer server is different. + Optionally, a filer server can be started. + Also optionally, a S3 gateway can be started. `, } var ( - serverIp = cmdServer.Flag.String("ip", "localhost", "ip or server name") - serverBindIp = cmdServer.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to") - serverMaxCpu = cmdServer.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs") - serverTimeout = cmdServer.Flag.Int("idleTimeout", 30, "connection idle seconds") - serverDataCenter = cmdServer.Flag.String("dataCenter", "", "current volume server's data center name") - serverRack = cmdServer.Flag.String("rack", "", "current volume server's rack name") - serverWhiteListOption = cmdServer.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.") - serverPeers = cmdServer.Flag.String("master.peers", "", "all master nodes in comma separated ip:masterPort list") - serverSecureKey = cmdServer.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") - serverGarbageThreshold = cmdServer.Flag.Float64("garbageThreshold", 0.3, "threshold to vacuum and reclaim spaces") - masterPort = cmdServer.Flag.Int("master.port", 9333, "master server http listen port") - masterMetaFolder = cmdServer.Flag.String("master.dir", "", "data directory to store meta data, default to same as -dir specified") - masterVolumeSizeLimitMB = cmdServer.Flag.Uint("master.volumeSizeLimitMB", 30*1000, "Master stops directing writes to oversized volumes.") - masterVolumePreallocate = cmdServer.Flag.Bool("master.volumePreallocate", false, "Preallocate disk space for volumes.") - masterDefaultReplicaPlacement = cmdServer.Flag.String("master.defaultReplicaPlacement", "000", "Default replication type if not specified.") - volumeDataFolders = cmdServer.Flag.String("dir", os.TempDir(), "directories to store data files. dir[,dir]...") - volumeMaxDataVolumeCounts = cmdServer.Flag.String("volume.max", "7", "maximum numbers of volumes, count[,count]...") - pulseSeconds = cmdServer.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats") - isStartingFiler = cmdServer.Flag.Bool("filer", false, "whether to start filer") + serverIp = cmdServer.Flag.String("ip", "localhost", "ip or server name") + serverBindIp = cmdServer.Flag.String("ip.bind", "0.0.0.0", "ip address to bind to") + serverTimeout = cmdServer.Flag.Int("idleTimeout", 30, "connection idle seconds") + serverDataCenter = cmdServer.Flag.String("dataCenter", "", "current volume server's data center name") + serverRack = cmdServer.Flag.String("rack", "", "current volume server's rack name") + serverWhiteListOption = cmdServer.Flag.String("whiteList", "", "comma separated Ip addresses having write permission. No limit if empty.") + serverDisableHttp = cmdServer.Flag.Bool("disableHttp", false, "disable http requests, only gRPC operations are allowed.") + volumeDataFolders = cmdServer.Flag.String("dir", os.TempDir(), "directories to store data files. dir[,dir]...") + volumeMaxDataVolumeCounts = cmdServer.Flag.String("volume.max", "7", "maximum numbers of volumes, count[,count]...") + pulseSeconds = cmdServer.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats") + isStartingFiler = cmdServer.Flag.Bool("filer", false, "whether to start filer") + isStartingS3 = cmdServer.Flag.Bool("s3", false, "whether to start S3 gateway") serverWhiteList []string ) func init() { serverOptions.cpuprofile = cmdServer.Flag.String("cpuprofile", "", "cpu profile output file") + + masterOptions.port = cmdServer.Flag.Int("master.port", 9333, "master server http listen port") + masterOptions.metaFolder = cmdServer.Flag.String("master.dir", "", "data directory to store meta data, default to same as -dir specified") + masterOptions.peers = cmdServer.Flag.String("master.peers", "", "all master nodes in comma separated ip:masterPort list") + masterOptions.volumeSizeLimitMB = cmdServer.Flag.Uint("master.volumeSizeLimitMB", 30*1000, "Master stops directing writes to oversized volumes.") + masterOptions.volumePreallocate = cmdServer.Flag.Bool("master.volumePreallocate", false, "Preallocate disk space for volumes.") + masterOptions.defaultReplication = cmdServer.Flag.String("master.defaultReplication", "000", "Default replication type if not specified.") + masterOptions.garbageThreshold = cmdServer.Flag.Float64("garbageThreshold", 0.3, "threshold to vacuum and reclaim spaces") + masterOptions.metricsAddress = cmdServer.Flag.String("metrics.address", "", "Prometheus gateway address") + masterOptions.metricsIntervalSec = cmdServer.Flag.Int("metrics.intervalSeconds", 15, "Prometheus push interval in seconds") + filerOptions.collection = cmdServer.Flag.String("filer.collection", "", "all data will be stored in this collection") filerOptions.port = cmdServer.Flag.Int("filer.port", 8888, "filer server http listen port") - filerOptions.grpcPort = cmdServer.Flag.Int("filer.port.grpc", 0, "filer grpc server listen port, default to http port + 10000") filerOptions.publicPort = cmdServer.Flag.Int("filer.port.public", 0, "filer server public http listen port") filerOptions.defaultReplicaPlacement = cmdServer.Flag.String("filer.defaultReplicaPlacement", "", "Default replication type if not specified during runtime.") - filerOptions.redirectOnRead = cmdServer.Flag.Bool("filer.redirectOnRead", false, "whether proxy or redirect to volume server during file GET request") filerOptions.disableDirListing = cmdServer.Flag.Bool("filer.disableDirListing", false, "turn off directory listing") filerOptions.maxMB = cmdServer.Flag.Int("filer.maxMB", 32, "split files larger than the limit") filerOptions.dirListingLimit = cmdServer.Flag.Int("filer.dirListLimit", 1000, "limit sub dir listing size") + filerOptions.cipher = cmdServer.Flag.Bool("filer.encryptVolumeData", false, "encrypt data on volume servers") serverOptions.v.port = cmdServer.Flag.Int("volume.port", 8080, "volume server http listen port") serverOptions.v.publicPort = cmdServer.Flag.Int("volume.port.public", 0, "volume server public port") - serverOptions.v.indexType = cmdServer.Flag.String("volume.index", "memory", "Choose [memory|leveldb|boltdb|btree] mode for memory~performance balance.") - serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", true, "Adjust jpg orientation when uploading.") + serverOptions.v.indexType = cmdServer.Flag.String("volume.index", "memory", "Choose [memory|leveldb|leveldbMedium|leveldbLarge] mode for memory~performance balance.") + serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", false, "Adjust jpg orientation when uploading.") serverOptions.v.readRedirect = cmdServer.Flag.Bool("volume.read.redirect", true, "Redirect moved or non-local volumes.") + serverOptions.v.compactionMBPerSecond = cmdServer.Flag.Int("volume.compactionMBps", 0, "limit compaction speed in mega bytes per second") + serverOptions.v.fileSizeLimitMB = cmdServer.Flag.Int("volume.fileSizeLimitMB", 256, "limit file size to avoid out of memory") serverOptions.v.publicUrl = cmdServer.Flag.String("volume.publicUrl", "", "publicly accessible address") + s3Options.port = cmdServer.Flag.Int("s3.port", 8333, "s3 server http listen port") + s3Options.domainName = cmdServer.Flag.String("s3.domainName", "", "suffix of the host name, {bucket}.{domainName}") + s3Options.tlsPrivateKey = cmdServer.Flag.String("s3.key.file", "", "path to the TLS private key file") + s3Options.tlsCertificate = cmdServer.Flag.String("s3.cert.file", "", "path to the TLS certificate file") + s3Options.config = cmdServer.Flag.String("s3.config", "", "path to the config file") + } func runServer(cmd *Command, args []string) bool { - filerOptions.secretKey = serverSecureKey + + util.LoadConfiguration("security", false) + util.LoadConfiguration("master", false) + if *serverOptions.cpuprofile != "" { f, err := os.Create(*serverOptions.cpuprofile) if err != nil { @@ -106,44 +114,56 @@ func runServer(cmd *Command, args []string) bool { defer pprof.StopCPUProfile() } - if *filerOptions.redirectOnRead { + if *isStartingS3 { *isStartingFiler = true } - master := *serverIp + ":" + strconv.Itoa(*masterPort) - filerOptions.ip = serverIp + _, peerList := checkPeers(*serverIp, *masterOptions.port, *masterOptions.peers) + peers := strings.Join(peerList, ",") + masterOptions.peers = &peers + + masterOptions.ip = serverIp + masterOptions.ipBind = serverBindIp + filerOptions.masters = &peers + filerOptions.ip = serverBindIp serverOptions.v.ip = serverIp serverOptions.v.bindIp = serverBindIp - serverOptions.v.masters = &master + serverOptions.v.masters = &peers serverOptions.v.idleConnectionTimeout = serverTimeout - serverOptions.v.maxCpu = serverMaxCpu serverOptions.v.dataCenter = serverDataCenter serverOptions.v.rack = serverRack + serverOptions.v.pulseSeconds = pulseSeconds + masterOptions.pulseSeconds = pulseSeconds + + masterOptions.whiteList = serverWhiteListOption filerOptions.dataCenter = serverDataCenter + filerOptions.disableHttp = serverDisableHttp + masterOptions.disableHttp = serverDisableHttp + + filerAddress := fmt.Sprintf("%s:%d", *serverIp, *filerOptions.port) + s3Options.filer = &filerAddress if *filerOptions.defaultReplicaPlacement == "" { - *filerOptions.defaultReplicaPlacement = *masterDefaultReplicaPlacement + *filerOptions.defaultReplicaPlacement = *masterOptions.defaultReplication } - if *serverMaxCpu < 1 { - *serverMaxCpu = runtime.NumCPU() - } - runtime.GOMAXPROCS(*serverMaxCpu) + runtime.GOMAXPROCS(runtime.NumCPU()) folders := strings.Split(*volumeDataFolders, ",") - if *masterVolumeSizeLimitMB > 30*1000 { + if *masterOptions.volumeSizeLimitMB > util.VolumeSizeLimitGB*1000 { glog.Fatalf("masterVolumeSizeLimitMB should be less than 30000") } - if *masterMetaFolder == "" { - *masterMetaFolder = folders[0] + if *masterOptions.metaFolder == "" { + *masterOptions.metaFolder = folders[0] } - if err := util.TestFolderWritable(*masterMetaFolder); err != nil { - glog.Fatalf("Check Meta Folder (-mdir=\"%s\") Writable: %s", *masterMetaFolder, err) + if err := util.TestFolderWritable(*masterOptions.metaFolder); err != nil { + glog.Fatalf("Check Meta Folder (-mdir=\"%s\") Writable: %s", *masterOptions.metaFolder, err) } + filerOptions.defaultLevelDbDirectory = masterOptions.metaFolder if *serverWhiteListOption != "" { serverWhiteList = strings.Split(*serverWhiteListOption, ",") @@ -158,63 +178,21 @@ func runServer(cmd *Command, args []string) bool { }() } - var raftWaitForMaster sync.WaitGroup - var volumeWait sync.WaitGroup - - raftWaitForMaster.Add(1) - volumeWait.Add(1) - - go func() { - r := mux.NewRouter() - ms := weed_server.NewMasterServer(r, *masterPort, *masterMetaFolder, - *masterVolumeSizeLimitMB, *masterVolumePreallocate, - *pulseSeconds, *masterDefaultReplicaPlacement, *serverGarbageThreshold, - serverWhiteList, *serverSecureKey, - ) - - glog.V(0).Infoln("Start Seaweed Master", util.VERSION, "at", *serverIp+":"+strconv.Itoa(*masterPort)) - masterListener, e := util.NewListener(*serverBindIp+":"+strconv.Itoa(*masterPort), 0) - if e != nil { - glog.Fatalf("Master startup error: %v", e) - } - + if *isStartingS3 { go func() { - raftWaitForMaster.Wait() - time.Sleep(100 * time.Millisecond) - myAddress, peers := checkPeers(*serverIp, *masterPort, *serverPeers) - raftServer := weed_server.NewRaftServer(r, peers, myAddress, *masterMetaFolder, ms.Topo, *pulseSeconds) - ms.SetRaftServer(raftServer) - volumeWait.Done() - }() - - raftWaitForMaster.Done() - - // start grpc and http server - m := cmux.New(masterListener) - - grpcL := m.Match(cmux.HTTP2HeaderField("content-type", "application/grpc")) - httpL := m.Match(cmux.Any()) - - // Create your protocol servers. - grpcS := util.NewGrpcServer() - master_pb.RegisterSeaweedServer(grpcS, ms) - reflection.Register(grpcS) + time.Sleep(2 * time.Second) - httpS := &http.Server{Handler: r} + s3Options.startS3Server() - go grpcS.Serve(grpcL) - go httpS.Serve(httpL) - - if err := m.Serve(); err != nil { - glog.Fatalf("master server failed to serve: %v", err) - } - - }() + }() + } - volumeWait.Wait() - time.Sleep(100 * time.Millisecond) + // start volume server + { + go serverOptions.v.startVolumeServer(*volumeDataFolders, *volumeMaxDataVolumeCounts, *serverWhiteListOption) + } - serverOptions.v.startVolumeServer(*volumeDataFolders, *volumeMaxDataVolumeCounts, *serverWhiteListOption) + startMaster(masterOptions, serverWhiteList) return true } diff --git a/weed/command/shell.go b/weed/command/shell.go index 19c5049c5..dcf70608f 100644 --- a/weed/command/shell.go +++ b/weed/command/shell.go @@ -1,61 +1,46 @@ package command import ( - "bufio" "fmt" - "os" - "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/shell" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + shellOptions shell.ShellOptions + shellInitialFilerUrl *string ) func init() { cmdShell.Run = runShell // break init cycle + shellOptions.Masters = cmdShell.Flag.String("master", "localhost:9333", "comma-separated master servers") + shellInitialFilerUrl = cmdShell.Flag.String("filer.url", "http://localhost:8888/", "initial filer url") } var cmdShell = &Command{ UsageLine: "shell", - Short: "run interactive commands, now just echo", - Long: `run interactive commands. + Short: "run interactive administrative commands", + Long: `run interactive administrative commands. `, } -var () - func runShell(command *Command, args []string) bool { - r := bufio.NewReader(os.Stdin) - o := bufio.NewWriter(os.Stdout) - e := bufio.NewWriter(os.Stderr) - prompt := func() { - var err error - if _, err = o.WriteString("> "); err != nil { - glog.V(0).Infoln("error writing to stdout:", err) - } - if err = o.Flush(); err != nil { - glog.V(0).Infoln("error flushing stdout:", err) - } - } - readLine := func() string { - ret, err := r.ReadString('\n') - if err != nil { - fmt.Fprint(e, err) - os.Exit(1) - } - return ret - } - execCmd := func(cmd string) int { - if cmd != "" { - if _, err := o.WriteString(cmd); err != nil { - glog.V(0).Infoln("error writing to stdout:", err) - } - } - return 0 - } - cmd := "" - for { - prompt() - cmd = readLine() - execCmd(cmd) + util.LoadConfiguration("security", false) + shellOptions.GrpcDialOption = security.LoadClientTLS(util.GetViper(), "grpc.client") + + var filerPwdErr error + shellOptions.FilerHost, shellOptions.FilerPort, shellOptions.Directory, filerPwdErr = util.ParseFilerUrl(*shellInitialFilerUrl) + if filerPwdErr != nil { + fmt.Printf("failed to parse url filer.url=%s : %v\n", *shellInitialFilerUrl, filerPwdErr) + return false } + + shell.RunShell(shellOptions) + + return true + } diff --git a/weed/command/upload.go b/weed/command/upload.go index c29d2b145..d71046131 100644 --- a/weed/command/upload.go +++ b/weed/command/upload.go @@ -8,6 +8,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/util" ) var ( @@ -23,7 +24,6 @@ type UploadOptions struct { dataCenter *string ttl *string maxMB *int - secretKey *string } func init() { @@ -36,8 +36,7 @@ func init() { upload.collection = cmdUpload.Flag.String("collection", "", "optional collection name") upload.dataCenter = cmdUpload.Flag.String("dataCenter", "", "optional data center name") upload.ttl = cmdUpload.Flag.String("ttl", "", "time to live, e.g.: 1m, 1h, 1d, 1M, 1y") - upload.maxMB = cmdUpload.Flag.Int("maxMB", 0, "split files larger than the limit") - upload.secretKey = cmdUpload.Flag.String("secure.secret", "", "secret to encrypt Json Web Token(JWT)") + upload.maxMB = cmdUpload.Flag.Int("maxMB", 32, "split files larger than the limit") } var cmdUpload = &Command{ @@ -53,18 +52,17 @@ var cmdUpload = &Command{ All files under the folder and subfolders will be uploaded, each with its own file key. Optional parameter "-include" allows you to specify the file name patterns. - If any file has a ".gz" extension, the content are considered gzipped already, and will be stored as is. - This can save volume server's gzipped processing and allow customizable gzip compression level. - The file name will strip out ".gz" and stored. For example, "jquery.js.gz" will be stored as "jquery.js". - - If "maxMB" is set to a positive number, files larger than it would be split into chunks and uploaded separatedly. + If "maxMB" is set to a positive number, files larger than it would be split into chunks and uploaded separately. The list of file ids of those chunks would be stored in an additional chunk, and this additional chunk's file id would be returned. `, } func runUpload(cmd *Command, args []string) bool { - secret := security.Secret(*upload.secretKey) + + util.LoadConfiguration("security", false) + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") + if len(args) == 0 { if *upload.dir == "" { return false @@ -81,9 +79,9 @@ func runUpload(cmd *Command, args []string) bool { if e != nil { return e } - results, e := operation.SubmitFiles(*upload.master, parts, + results, e := operation.SubmitFiles(*upload.master, grpcDialOption, parts, *upload.replication, *upload.collection, *upload.dataCenter, - *upload.ttl, *upload.maxMB, secret) + *upload.ttl, *upload.maxMB) bytes, _ := json.Marshal(results) fmt.Println(string(bytes)) if e != nil { @@ -100,9 +98,9 @@ func runUpload(cmd *Command, args []string) bool { if e != nil { fmt.Println(e.Error()) } - results, _ := operation.SubmitFiles(*upload.master, parts, + results, _ := operation.SubmitFiles(*upload.master, grpcDialOption, parts, *upload.replication, *upload.collection, *upload.dataCenter, - *upload.ttl, *upload.maxMB, secret) + *upload.ttl, *upload.maxMB) bytes, _ := json.Marshal(results) fmt.Println(string(bytes)) } diff --git a/weed/command/volume.go b/weed/command/volume.go index d848629ae..4773d8a55 100644 --- a/weed/command/volume.go +++ b/weed/command/volume.go @@ -1,6 +1,7 @@ package command import ( + "fmt" "net/http" "os" "runtime" @@ -9,12 +10,20 @@ import ( "strings" "time" + "github.com/spf13/viper" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/util/httpdown" + + "google.golang.org/grpc/reflection" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" "github.com/chrislusf/seaweedfs/weed/server" "github.com/chrislusf/seaweedfs/weed/storage" "github.com/chrislusf/seaweedfs/weed/util" - "google.golang.org/grpc/reflection" ) var ( @@ -32,7 +41,6 @@ type VolumeServerOptions struct { masters *string pulseSeconds *int idleConnectionTimeout *int - maxCpu *int dataCenter *string rack *string whiteList []string @@ -41,6 +49,8 @@ type VolumeServerOptions struct { readRedirect *bool cpuProfile *string memProfile *string + compactionMBPerSecond *int + fileSizeLimitMB *int } func init() { @@ -53,14 +63,15 @@ func init() { v.masters = cmdVolume.Flag.String("mserver", "localhost:9333", "comma-separated master servers") v.pulseSeconds = cmdVolume.Flag.Int("pulseSeconds", 5, "number of seconds between heartbeats, must be smaller than or equal to the master's setting") v.idleConnectionTimeout = cmdVolume.Flag.Int("idleTimeout", 30, "connection idle seconds") - v.maxCpu = cmdVolume.Flag.Int("maxCpu", 0, "maximum number of CPUs. 0 means all available CPUs") v.dataCenter = cmdVolume.Flag.String("dataCenter", "", "current volume server's data center name") v.rack = cmdVolume.Flag.String("rack", "", "current volume server's rack name") - v.indexType = cmdVolume.Flag.String("index", "memory", "Choose [memory|leveldb|boltdb|btree] mode for memory~performance balance.") - v.fixJpgOrientation = cmdVolume.Flag.Bool("images.fix.orientation", true, "Adjust jpg orientation when uploading.") + v.indexType = cmdVolume.Flag.String("index", "memory", "Choose [memory|leveldb|leveldbMedium|leveldbLarge] mode for memory~performance balance.") + v.fixJpgOrientation = cmdVolume.Flag.Bool("images.fix.orientation", false, "Adjust jpg orientation when uploading.") v.readRedirect = cmdVolume.Flag.Bool("read.redirect", true, "Redirect moved or non-local volumes.") v.cpuProfile = cmdVolume.Flag.String("cpuprofile", "", "cpu profile output file") v.memProfile = cmdVolume.Flag.String("memprofile", "", "memory profile output file") + v.compactionMBPerSecond = cmdVolume.Flag.Int("compactionMBps", 0, "limit background compaction or copying speed in mega bytes per second") + v.fileSizeLimitMB = cmdVolume.Flag.Int("fileSizeLimitMB", 256, "limit file size to avoid out of memory") } var cmdVolume = &Command{ @@ -78,10 +89,10 @@ var ( ) func runVolume(cmd *Command, args []string) bool { - if *v.maxCpu < 1 { - *v.maxCpu = runtime.NumCPU() - } - runtime.GOMAXPROCS(*v.maxCpu) + + util.LoadConfiguration("security", false) + + runtime.GOMAXPROCS(runtime.NumCPU()) util.SetupProfiling(*v.cpuProfile, *v.memProfile) v.startVolumeServer(*volumeFolders, *maxVolumeCounts, *volumeWhiteListOption) @@ -91,7 +102,7 @@ func runVolume(cmd *Command, args []string) bool { func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, volumeWhiteListOption string) { - //Set multiple folders and each folder's max volume count limit' + // Set multiple folders and each folder's max volume count limit' v.folders = strings.Split(volumeFolders, ",") maxCountStrings := strings.Split(maxVolumeCounts, ",") for _, maxString := range maxCountStrings { @@ -110,7 +121,7 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v } } - //security related white list configuration + // security related white list configuration if volumeWhiteListOption != "" { v.whiteList = strings.Split(volumeWhiteListOption, ",") } @@ -125,11 +136,10 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v if *v.publicUrl == "" { *v.publicUrl = *v.ip + ":" + strconv.Itoa(*v.publicPort) } - isSeperatedPublicPort := *v.publicPort != *v.port volumeMux := http.NewServeMux() publicVolumeMux := volumeMux - if isSeperatedPublicPort { + if v.isSeparatedPublicPort() { publicVolumeMux = http.NewServeMux() } @@ -137,10 +147,10 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v switch *v.indexType { case "leveldb": volumeNeedleMapKind = storage.NeedleMapLevelDb - case "boltdb": - volumeNeedleMapKind = storage.NeedleMapBoltDb - case "btree": - volumeNeedleMapKind = storage.NeedleMapBtree + case "leveldbMedium": + volumeNeedleMapKind = storage.NeedleMapLevelDbMedium + case "leveldbLarge": + volumeNeedleMapKind = storage.NeedleMapLevelDbLarge } masters := *v.masters @@ -152,46 +162,135 @@ func (v VolumeServerOptions) startVolumeServer(volumeFolders, maxVolumeCounts, v strings.Split(masters, ","), *v.pulseSeconds, *v.dataCenter, *v.rack, v.whiteList, *v.fixJpgOrientation, *v.readRedirect, + *v.compactionMBPerSecond, + *v.fileSizeLimitMB, ) - listeningAddress := *v.bindIp + ":" + strconv.Itoa(*v.port) - glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "at", listeningAddress) - listener, e := util.NewListener(listeningAddress, time.Duration(*v.idleConnectionTimeout)*time.Second) - if e != nil { - glog.Fatalf("Volume server listener error:%v", e) - } - if isSeperatedPublicPort { - publicListeningAddress := *v.bindIp + ":" + strconv.Itoa(*v.publicPort) - glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "public at", publicListeningAddress) - publicListener, e := util.NewListener(publicListeningAddress, time.Duration(*v.idleConnectionTimeout)*time.Second) - if e != nil { - glog.Fatalf("Volume server listener error:%v", e) + // starting grpc server + grpcS := v.startGrpcService(volumeServer) + + // starting public http server + var publicHttpDown httpdown.Server + if v.isSeparatedPublicPort() { + publicHttpDown = v.startPublicHttpService(publicVolumeMux) + if nil == publicHttpDown { + glog.Fatalf("start public http service failed") } - go func() { - if e := http.Serve(publicListener, publicVolumeMux); e != nil { - glog.Fatalf("Volume server fail to serve public: %v", e) - } - }() } + // starting the cluster http server + clusterHttpServer := v.startClusterHttpService(volumeMux) + + stopChain := make(chan struct{}) util.OnInterrupt(func() { + fmt.Println("volume server has be killed") + var startTime time.Time + + // firstly, stop the public http service to prevent from receiving new user request + if nil != publicHttpDown { + startTime = time.Now() + if err := publicHttpDown.Stop(); err != nil { + glog.Warningf("stop the public http server failed, %v", err) + } + delta := time.Now().Sub(startTime).Nanoseconds() / 1e6 + glog.V(0).Infof("stop public http server, elapsed %dms", delta) + } + + startTime = time.Now() + if err := clusterHttpServer.Stop(); err != nil { + glog.Warningf("stop the cluster http server failed, %v", err) + } + delta := time.Now().Sub(startTime).Nanoseconds() / 1e6 + glog.V(0).Infof("graceful stop cluster http server, elapsed [%d]", delta) + + startTime = time.Now() + grpcS.GracefulStop() + delta = time.Now().Sub(startTime).Nanoseconds() / 1e6 + glog.V(0).Infof("graceful stop gRPC, elapsed [%d]", delta) + + startTime = time.Now() volumeServer.Shutdown() + delta = time.Now().Sub(startTime).Nanoseconds() / 1e6 + glog.V(0).Infof("stop volume server, elapsed [%d]", delta) + pprof.StopCPUProfile() + + close(stopChain) // notify exit }) - // starting grpc server + select { + case <-stopChain: + } + glog.Warningf("the volume server exit.") +} + +// check whether configure the public port +func (v VolumeServerOptions) isSeparatedPublicPort() bool { + return *v.publicPort != *v.port +} + +func (v VolumeServerOptions) startGrpcService(vs volume_server_pb.VolumeServerServer) *grpc.Server { grpcPort := *v.port + 10000 grpcL, err := util.NewListener(*v.bindIp+":"+strconv.Itoa(grpcPort), 0) if err != nil { glog.Fatalf("failed to listen on grpc port %d: %v", grpcPort, err) } - grpcS := util.NewGrpcServer() - volume_server_pb.RegisterVolumeServerServer(grpcS, volumeServer) + grpcS := pb.NewGrpcServer(security.LoadServerTLS(util.GetViper(), "grpc.volume")) + volume_server_pb.RegisterVolumeServerServer(grpcS, vs) reflection.Register(grpcS) - go grpcS.Serve(grpcL) + go func() { + if err := grpcS.Serve(grpcL); err != nil { + glog.Fatalf("start gRPC service failed, %s", err) + } + }() + return grpcS +} - if e := http.Serve(listener, volumeMux); e != nil { - glog.Fatalf("Volume server fail to serve: %v", e) +func (v VolumeServerOptions) startPublicHttpService(handler http.Handler) httpdown.Server { + publicListeningAddress := *v.bindIp + ":" + strconv.Itoa(*v.publicPort) + glog.V(0).Infoln("Start Seaweed volume server", util.VERSION, "public at", publicListeningAddress) + publicListener, e := util.NewListener(publicListeningAddress, time.Duration(*v.idleConnectionTimeout)*time.Second) + if e != nil { + glog.Fatalf("Volume server listener error:%v", e) } + pubHttp := httpdown.HTTP{StopTimeout: 5 * time.Minute, KillTimeout: 5 * time.Minute} + publicHttpDown := pubHttp.Serve(&http.Server{Handler: handler}, publicListener) + go func() { + if err := publicHttpDown.Wait(); err != nil { + glog.Errorf("public http down wait failed, %v", err) + } + }() + + return publicHttpDown +} + +func (v VolumeServerOptions) startClusterHttpService(handler http.Handler) httpdown.Server { + var ( + certFile, keyFile string + ) + if viper.GetString("https.volume.key") != "" { + certFile = viper.GetString("https.volume.cert") + keyFile = viper.GetString("https.volume.key") + } + + listeningAddress := *v.bindIp + ":" + strconv.Itoa(*v.port) + glog.V(0).Infof("Start Seaweed volume server %s at %s", util.VERSION, listeningAddress) + listener, e := util.NewListener(listeningAddress, time.Duration(*v.idleConnectionTimeout)*time.Second) + if e != nil { + glog.Fatalf("Volume server listener error:%v", e) + } + + httpDown := httpdown.HTTP{ + KillTimeout: 5 * time.Minute, + StopTimeout: 5 * time.Minute, + CertFile: certFile, + KeyFile: keyFile} + clusterHttpServer := httpDown.Serve(&http.Server{Handler: handler}, listener) + go func() { + if e := clusterHttpServer.Wait(); e != nil { + glog.Fatalf("Volume server fail to serve: %v", e) + } + }() + return clusterHttpServer } diff --git a/weed/command/webdav.go b/weed/command/webdav.go new file mode 100644 index 000000000..4f5d5f5ce --- /dev/null +++ b/weed/command/webdav.go @@ -0,0 +1,135 @@ +package command + +import ( + "context" + "fmt" + "net/http" + "os/user" + "strconv" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/server" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + webDavStandaloneOptions WebDavOption +) + +type WebDavOption struct { + filer *string + port *int + collection *string + tlsPrivateKey *string + tlsCertificate *string +} + +func init() { + cmdWebDav.Run = runWebDav // break init cycle + webDavStandaloneOptions.filer = cmdWebDav.Flag.String("filer", "localhost:8888", "filer server address") + webDavStandaloneOptions.port = cmdWebDav.Flag.Int("port", 7333, "webdav server http listen port") + webDavStandaloneOptions.collection = cmdWebDav.Flag.String("collection", "", "collection to create the files") + webDavStandaloneOptions.tlsPrivateKey = cmdWebDav.Flag.String("key.file", "", "path to the TLS private key file") + webDavStandaloneOptions.tlsCertificate = cmdWebDav.Flag.String("cert.file", "", "path to the TLS certificate file") +} + +var cmdWebDav = &Command{ + UsageLine: "webdav -port=7333 -filer=<ip:port>", + Short: "start a webdav server that is backed by a filer", + Long: `start a webdav server that is backed by a filer. + +`, +} + +func runWebDav(cmd *Command, args []string) bool { + + util.LoadConfiguration("security", false) + + glog.V(0).Infof("Starting Seaweed WebDav Server %s at https port %d", util.VERSION, *webDavStandaloneOptions.port) + + return webDavStandaloneOptions.startWebDav() + +} + +func (wo *WebDavOption) startWebDav() bool { + + // detect current user + uid, gid := uint32(0), uint32(0) + if u, err := user.Current(); err == nil { + if parsedId, pe := strconv.ParseUint(u.Uid, 10, 32); pe == nil { + uid = uint32(parsedId) + } + if parsedId, pe := strconv.ParseUint(u.Gid, 10, 32); pe == nil { + gid = uint32(parsedId) + } + } + + // parse filer grpc address + filerGrpcAddress, err := pb.ParseFilerGrpcAddress(*wo.filer) + if err != nil { + glog.Fatal(err) + return false + } + + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client") + + var cipher bool + // connect to filer + for { + err = pb.WithGrpcFilerClient(filerGrpcAddress, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get filer %s configuration: %v", filerGrpcAddress, err) + } + cipher = resp.Cipher + return nil + }) + if err != nil { + glog.V(0).Infof("wait to connect to filer %s grpc address %s", *wo.filer, filerGrpcAddress) + time.Sleep(time.Second) + } else { + glog.V(0).Infof("connected to filer %s grpc address %s", *wo.filer, filerGrpcAddress) + break + } + } + + ws, webdavServer_err := weed_server.NewWebDavServer(&weed_server.WebDavOption{ + Filer: *wo.filer, + FilerGrpcAddress: filerGrpcAddress, + GrpcDialOption: grpcDialOption, + Collection: *wo.collection, + Uid: uid, + Gid: gid, + Cipher: cipher, + }) + if webdavServer_err != nil { + glog.Fatalf("WebDav Server startup error: %v", webdavServer_err) + } + + httpS := &http.Server{Handler: ws.Handler} + + listenAddress := fmt.Sprintf(":%d", *wo.port) + webDavListener, err := util.NewListener(listenAddress, time.Duration(10)*time.Second) + if err != nil { + glog.Fatalf("WebDav Server listener on %s error: %v", listenAddress, err) + } + + if *wo.tlsPrivateKey != "" { + glog.V(0).Infof("Start Seaweed WebDav Server %s at https port %d", util.VERSION, *wo.port) + if err = httpS.ServeTLS(webDavListener, *wo.tlsCertificate, *wo.tlsPrivateKey); err != nil { + glog.Fatalf("WebDav Server Fail to serve: %v", err) + } + } else { + glog.V(0).Infof("Start Seaweed WebDav Server %s at http port %d", util.VERSION, *wo.port) + if err = httpS.Serve(webDavListener); err != nil { + glog.Fatalf("WebDav Server Fail to serve: %v", err) + } + } + + return true + +} diff --git a/weed/filer2/abstract_sql/abstract_sql_store.go b/weed/filer2/abstract_sql/abstract_sql_store.go index 5f2990475..864c858d3 100644 --- a/weed/filer2/abstract_sql/abstract_sql_store.go +++ b/weed/filer2/abstract_sql/abstract_sql_store.go @@ -1,24 +1,65 @@ package abstract_sql import ( + "context" "database/sql" "fmt" "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" ) type AbstractSqlStore struct { - DB *sql.DB - SqlInsert string - SqlUpdate string - SqlFind string - SqlDelete string - SqlListExclusive string - SqlListInclusive string + DB *sql.DB + SqlInsert string + SqlUpdate string + SqlFind string + SqlDelete string + SqlDeleteFolderChildren string + SqlListExclusive string + SqlListInclusive string } -func (store *AbstractSqlStore) InsertEntry(entry *filer2.Entry) (err error) { +type TxOrDB interface { + ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) + QueryRowContext(ctx context.Context, query string, args ...interface{}) *sql.Row + QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) +} + +func (store *AbstractSqlStore) BeginTransaction(ctx context.Context) (context.Context, error) { + tx, err := store.DB.BeginTx(ctx, &sql.TxOptions{ + Isolation: sql.LevelReadCommitted, + ReadOnly: false, + }) + if err != nil { + return ctx, err + } + + return context.WithValue(ctx, "tx", tx), nil +} +func (store *AbstractSqlStore) CommitTransaction(ctx context.Context) error { + if tx, ok := ctx.Value("tx").(*sql.Tx); ok { + return tx.Commit() + } + return nil +} +func (store *AbstractSqlStore) RollbackTransaction(ctx context.Context) error { + if tx, ok := ctx.Value("tx").(*sql.Tx); ok { + return tx.Rollback() + } + return nil +} + +func (store *AbstractSqlStore) getTxOrDB(ctx context.Context) TxOrDB { + if tx, ok := ctx.Value("tx").(*sql.Tx); ok { + return tx + } + return store.DB +} + +func (store *AbstractSqlStore) InsertEntry(ctx context.Context, entry *filer2.Entry) (err error) { dir, name := entry.FullPath.DirAndName() meta, err := entry.EncodeAttributesAndChunks() @@ -26,7 +67,7 @@ func (store *AbstractSqlStore) InsertEntry(entry *filer2.Entry) (err error) { return fmt.Errorf("encode %s: %s", entry.FullPath, err) } - res, err := store.DB.Exec(store.SqlInsert, hashToLong(dir), name, dir, meta) + res, err := store.getTxOrDB(ctx).ExecContext(ctx, store.SqlInsert, util.HashStringToLong(dir), name, dir, meta) if err != nil { return fmt.Errorf("insert %s: %s", entry.FullPath, err) } @@ -38,7 +79,7 @@ func (store *AbstractSqlStore) InsertEntry(entry *filer2.Entry) (err error) { return nil } -func (store *AbstractSqlStore) UpdateEntry(entry *filer2.Entry) (err error) { +func (store *AbstractSqlStore) UpdateEntry(ctx context.Context, entry *filer2.Entry) (err error) { dir, name := entry.FullPath.DirAndName() meta, err := entry.EncodeAttributesAndChunks() @@ -46,7 +87,7 @@ func (store *AbstractSqlStore) UpdateEntry(entry *filer2.Entry) (err error) { return fmt.Errorf("encode %s: %s", entry.FullPath, err) } - res, err := store.DB.Exec(store.SqlUpdate, meta, hashToLong(dir), name, dir) + res, err := store.getTxOrDB(ctx).ExecContext(ctx, store.SqlUpdate, meta, util.HashStringToLong(dir), name, dir) if err != nil { return fmt.Errorf("update %s: %s", entry.FullPath, err) } @@ -58,13 +99,13 @@ func (store *AbstractSqlStore) UpdateEntry(entry *filer2.Entry) (err error) { return nil } -func (store *AbstractSqlStore) FindEntry(fullpath filer2.FullPath) (*filer2.Entry, error) { +func (store *AbstractSqlStore) FindEntry(ctx context.Context, fullpath filer2.FullPath) (*filer2.Entry, error) { dir, name := fullpath.DirAndName() - row := store.DB.QueryRow(store.SqlFind, hashToLong(dir), name, dir) + row := store.getTxOrDB(ctx).QueryRowContext(ctx, store.SqlFind, util.HashStringToLong(dir), name, dir) var data []byte if err := row.Scan(&data); err != nil { - return nil, filer2.ErrNotFound + return nil, filer_pb.ErrNotFound } entry := &filer2.Entry{ @@ -77,11 +118,11 @@ func (store *AbstractSqlStore) FindEntry(fullpath filer2.FullPath) (*filer2.Entr return entry, nil } -func (store *AbstractSqlStore) DeleteEntry(fullpath filer2.FullPath) error { +func (store *AbstractSqlStore) DeleteEntry(ctx context.Context, fullpath filer2.FullPath) error { dir, name := fullpath.DirAndName() - res, err := store.DB.Exec(store.SqlDelete, hashToLong(dir), name, dir) + res, err := store.getTxOrDB(ctx).ExecContext(ctx, store.SqlDelete, util.HashStringToLong(dir), name, dir) if err != nil { return fmt.Errorf("delete %s: %s", fullpath, err) } @@ -94,14 +135,29 @@ func (store *AbstractSqlStore) DeleteEntry(fullpath filer2.FullPath) error { return nil } -func (store *AbstractSqlStore) ListDirectoryEntries(fullpath filer2.FullPath, startFileName string, inclusive bool, limit int) (entries []*filer2.Entry, err error) { +func (store *AbstractSqlStore) DeleteFolderChildren(ctx context.Context, fullpath filer2.FullPath) error { + + res, err := store.getTxOrDB(ctx).ExecContext(ctx, store.SqlDeleteFolderChildren, util.HashStringToLong(string(fullpath)), fullpath) + if err != nil { + return fmt.Errorf("deleteFolderChildren %s: %s", fullpath, err) + } + + _, err = res.RowsAffected() + if err != nil { + return fmt.Errorf("deleteFolderChildren %s but no rows affected: %s", fullpath, err) + } + + return nil +} + +func (store *AbstractSqlStore) ListDirectoryEntries(ctx context.Context, fullpath filer2.FullPath, startFileName string, inclusive bool, limit int) (entries []*filer2.Entry, err error) { sqlText := store.SqlListExclusive if inclusive { sqlText = store.SqlListInclusive } - rows, err := store.DB.Query(sqlText, hashToLong(string(fullpath)), startFileName, string(fullpath), limit) + rows, err := store.getTxOrDB(ctx).QueryContext(ctx, sqlText, util.HashStringToLong(string(fullpath)), startFileName, string(fullpath), limit) if err != nil { return nil, fmt.Errorf("list %s : %v", fullpath, err) } diff --git a/weed/filer2/abstract_sql/hashing.go b/weed/filer2/abstract_sql/hashing.go deleted file mode 100644 index 5c982c537..000000000 --- a/weed/filer2/abstract_sql/hashing.go +++ /dev/null @@ -1,32 +0,0 @@ -package abstract_sql - -import ( - "crypto/md5" - "io" -) - -// returns a 64 bit big int -func hashToLong(dir string) (v int64) { - h := md5.New() - io.WriteString(h, dir) - - b := h.Sum(nil) - - v += int64(b[0]) - v <<= 8 - v += int64(b[1]) - v <<= 8 - v += int64(b[2]) - v <<= 8 - v += int64(b[3]) - v <<= 8 - v += int64(b[4]) - v <<= 8 - v += int64(b[5]) - v <<= 8 - v += int64(b[6]) - v <<= 8 - v += int64(b[7]) - - return -} diff --git a/weed/filer2/cassandra/cassandra_store.go b/weed/filer2/cassandra/cassandra_store.go index e8f51dd04..6f25fffec 100644 --- a/weed/filer2/cassandra/cassandra_store.go +++ b/weed/filer2/cassandra/cassandra_store.go @@ -1,11 +1,15 @@ package cassandra import ( + "context" "fmt" + + "github.com/gocql/gocql" + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/util" - "github.com/gocql/gocql" ) func init() { @@ -21,10 +25,10 @@ func (store *CassandraStore) GetName() string { return "cassandra" } -func (store *CassandraStore) Initialize(configuration util.Configuration) (err error) { +func (store *CassandraStore) Initialize(configuration util.Configuration, prefix string) (err error) { return store.initialize( - configuration.GetString("keyspace"), - configuration.GetStringSlice("hosts"), + configuration.GetString(prefix+"keyspace"), + configuration.GetStringSlice(prefix+"hosts"), ) } @@ -39,7 +43,17 @@ func (store *CassandraStore) initialize(keyspace string, hosts []string) (err er return } -func (store *CassandraStore) InsertEntry(entry *filer2.Entry) (err error) { +func (store *CassandraStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *CassandraStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *CassandraStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *CassandraStore) InsertEntry(ctx context.Context, entry *filer2.Entry) (err error) { dir, name := entry.FullPath.DirAndName() meta, err := entry.EncodeAttributesAndChunks() @@ -48,20 +62,20 @@ func (store *CassandraStore) InsertEntry(entry *filer2.Entry) (err error) { } if err := store.session.Query( - "INSERT INTO filemeta (directory,name,meta) VALUES(?,?,?)", - dir, name, meta).Exec(); err != nil { + "INSERT INTO filemeta (directory,name,meta) VALUES(?,?,?) USING TTL ? ", + dir, name, meta, entry.TtlSec).Exec(); err != nil { return fmt.Errorf("insert %s: %s", entry.FullPath, err) } return nil } -func (store *CassandraStore) UpdateEntry(entry *filer2.Entry) (err error) { +func (store *CassandraStore) UpdateEntry(ctx context.Context, entry *filer2.Entry) (err error) { - return store.InsertEntry(entry) + return store.InsertEntry(ctx, entry) } -func (store *CassandraStore) FindEntry(fullpath filer2.FullPath) (entry *filer2.Entry, err error) { +func (store *CassandraStore) FindEntry(ctx context.Context, fullpath filer2.FullPath) (entry *filer2.Entry, err error) { dir, name := fullpath.DirAndName() var data []byte @@ -69,12 +83,12 @@ func (store *CassandraStore) FindEntry(fullpath filer2.FullPath) (entry *filer2. "SELECT meta FROM filemeta WHERE directory=? AND name=?", dir, name).Consistency(gocql.One).Scan(&data); err != nil { if err != gocql.ErrNotFound { - return nil, filer2.ErrNotFound + return nil, filer_pb.ErrNotFound } } if len(data) == 0 { - return nil, fmt.Errorf("not found: %s", fullpath) + return nil, filer_pb.ErrNotFound } entry = &filer2.Entry{ @@ -88,7 +102,7 @@ func (store *CassandraStore) FindEntry(fullpath filer2.FullPath) (entry *filer2. return entry, nil } -func (store *CassandraStore) DeleteEntry(fullpath filer2.FullPath) error { +func (store *CassandraStore) DeleteEntry(ctx context.Context, fullpath filer2.FullPath) error { dir, name := fullpath.DirAndName() @@ -101,7 +115,18 @@ func (store *CassandraStore) DeleteEntry(fullpath filer2.FullPath) error { return nil } -func (store *CassandraStore) ListDirectoryEntries(fullpath filer2.FullPath, startFileName string, inclusive bool, +func (store *CassandraStore) DeleteFolderChildren(ctx context.Context, fullpath filer2.FullPath) error { + + if err := store.session.Query( + "DELETE FROM filemeta WHERE directory=?", + fullpath).Exec(); err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *CassandraStore) ListDirectoryEntries(ctx context.Context, fullpath filer2.FullPath, startFileName string, inclusive bool, limit int) (entries []*filer2.Entry, err error) { cqlStr := "SELECT NAME, meta FROM filemeta WHERE directory=? AND name>? ORDER BY NAME ASC LIMIT ?" diff --git a/weed/filer2/configuration.go b/weed/filer2/configuration.go index 14a9edd2c..a174117ea 100644 --- a/weed/filer2/configuration.go +++ b/weed/filer2/configuration.go @@ -13,10 +13,11 @@ var ( func (f *Filer) LoadConfiguration(config *viper.Viper) { + validateOneEnabledStore(config) + for _, store := range Stores { if config.GetBool(store.GetName() + ".enabled") { - viperSub := config.Sub(store.GetName()) - if err := store.Initialize(viperSub); err != nil { + if err := store.Initialize(config, store.GetName()+"."); err != nil { glog.Fatalf("Failed to initialize store for %s: %+v", store.GetName(), err) } @@ -34,3 +35,16 @@ func (f *Filer) LoadConfiguration(config *viper.Viper) { os.Exit(-1) } + +func validateOneEnabledStore(config *viper.Viper) { + enabledStore := "" + for _, store := range Stores { + if config.GetBool(store.GetName() + ".enabled") { + if enabledStore == "" { + enabledStore = store.GetName() + } else { + glog.Fatalf("Filer store is enabled for both %s and %s", enabledStore, store.GetName()) + } + } + } +} diff --git a/weed/filer2/entry.go b/weed/filer2/entry.go index 82bc00b27..c901927bb 100644 --- a/weed/filer2/entry.go +++ b/weed/filer2/entry.go @@ -8,15 +8,18 @@ import ( ) type Attr struct { - Mtime time.Time // time of last modification - Crtime time.Time // time of creation (OS X only) - Mode os.FileMode // file mode - Uid uint32 // owner uid - Gid uint32 // group gid - Mime string // mime type - Replication string // replication - Collection string // collection name - TtlSec int32 // ttl in seconds + Mtime time.Time // time of last modification + Crtime time.Time // time of creation (OS X only) + Mode os.FileMode // file mode + Uid uint32 // owner uid + Gid uint32 // group gid + Mime string // mime type + Replication string // replication + Collection string // collection name + TtlSec int32 // ttl in seconds + UserName string + GroupNames []string + SymlinkTarget string } func (attr Attr) IsDirectory() bool { @@ -27,6 +30,7 @@ type Entry struct { FullPath Attr + Extended map[string][]byte // the following is for files Chunks []*filer_pb.FileChunk `json:"chunks,omitempty"` @@ -43,3 +47,27 @@ func (entry *Entry) Timestamp() time.Time { return entry.Mtime } } + +func (entry *Entry) ToProtoEntry() *filer_pb.Entry { + if entry == nil { + return nil + } + return &filer_pb.Entry{ + Name: entry.FullPath.Name(), + IsDirectory: entry.IsDirectory(), + Attributes: EntryAttributeToPb(entry), + Chunks: entry.Chunks, + Extended: entry.Extended, + } +} + +func (entry *Entry) ToProtoFullEntry() *filer_pb.FullEntry { + if entry == nil { + return nil + } + dir, _ := entry.FullPath.DirAndName() + return &filer_pb.FullEntry{ + Dir: dir, + Entry: entry.ToProtoEntry(), + } +} diff --git a/weed/filer2/entry_codec.go b/weed/filer2/entry_codec.go index baa6a9440..3a2dc6134 100644 --- a/weed/filer2/entry_codec.go +++ b/weed/filer2/entry_codec.go @@ -1,18 +1,21 @@ package filer2 import ( + "bytes" + "fmt" "os" "time" - "fmt" + "github.com/golang/protobuf/proto" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/gogo/protobuf/proto" ) func (entry *Entry) EncodeAttributesAndChunks() ([]byte, error) { message := &filer_pb.Entry{ Attributes: EntryAttributeToPb(entry), Chunks: entry.Chunks, + Extended: entry.Extended, } return proto.Marshal(message) } @@ -27,6 +30,8 @@ func (entry *Entry) DecodeAttributesAndChunks(blob []byte) error { entry.Attr = PbToEntryAttribute(message.Attributes) + entry.Extended = message.Extended + entry.Chunks = message.Chunks return nil @@ -35,15 +40,18 @@ func (entry *Entry) DecodeAttributesAndChunks(blob []byte) error { func EntryAttributeToPb(entry *Entry) *filer_pb.FuseAttributes { return &filer_pb.FuseAttributes{ - Crtime: entry.Attr.Crtime.Unix(), - Mtime: entry.Attr.Mtime.Unix(), - FileMode: uint32(entry.Attr.Mode), - Uid: entry.Uid, - Gid: entry.Gid, - Mime: entry.Mime, - Collection: entry.Attr.Collection, - Replication: entry.Attr.Replication, - TtlSec: entry.Attr.TtlSec, + Crtime: entry.Attr.Crtime.Unix(), + Mtime: entry.Attr.Mtime.Unix(), + FileMode: uint32(entry.Attr.Mode), + Uid: entry.Uid, + Gid: entry.Gid, + Mime: entry.Mime, + Collection: entry.Attr.Collection, + Replication: entry.Attr.Replication, + TtlSec: entry.Attr.TtlSec, + UserName: entry.Attr.UserName, + GroupName: entry.Attr.GroupNames, + SymlinkTarget: entry.Attr.SymlinkTarget, } } @@ -60,6 +68,9 @@ func PbToEntryAttribute(attr *filer_pb.FuseAttributes) Attr { t.Collection = attr.Collection t.Replication = attr.Replication t.TtlSec = attr.TtlSec + t.UserName = attr.UserName + t.GroupNames = attr.GroupName + t.SymlinkTarget = attr.SymlinkTarget return t } @@ -78,6 +89,10 @@ func EqualEntry(a, b *Entry) bool { return false } + if !eq(a.Extended, b.Extended) { + return false + } + for i := 0; i < len(a.Chunks); i++ { if !proto.Equal(a.Chunks[i], b.Chunks[i]) { return false @@ -85,3 +100,17 @@ func EqualEntry(a, b *Entry) bool { } return true } + +func eq(a, b map[string][]byte) bool { + if len(a) != len(b) { + return false + } + + for k, v := range a { + if w, ok := b[k]; !ok || !bytes.Equal(v, w) { + return false + } + } + + return true +} diff --git a/weed/filer2/etcd/etcd_store.go b/weed/filer2/etcd/etcd_store.go new file mode 100644 index 000000000..83a6ddc5d --- /dev/null +++ b/weed/filer2/etcd/etcd_store.go @@ -0,0 +1,198 @@ +package etcd + +import ( + "context" + "fmt" + "strings" + "time" + + "go.etcd.io/etcd/clientv3" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + weed_util "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + DIR_FILE_SEPARATOR = byte(0x00) +) + +func init() { + filer2.Stores = append(filer2.Stores, &EtcdStore{}) +} + +type EtcdStore struct { + client *clientv3.Client +} + +func (store *EtcdStore) GetName() string { + return "etcd" +} + +func (store *EtcdStore) Initialize(configuration weed_util.Configuration, prefix string) (err error) { + servers := configuration.GetString(prefix + "servers") + if servers == "" { + servers = "localhost:2379" + } + + timeout := configuration.GetString(prefix + "timeout") + if timeout == "" { + timeout = "3s" + } + + return store.initialize(servers, timeout) +} + +func (store *EtcdStore) initialize(servers string, timeout string) (err error) { + glog.Infof("filer store etcd: %s", servers) + + to, err := time.ParseDuration(timeout) + if err != nil { + return fmt.Errorf("parse timeout %s: %s", timeout, err) + } + + store.client, err = clientv3.New(clientv3.Config{ + Endpoints: strings.Split(servers, ","), + DialTimeout: to, + }) + if err != nil { + return fmt.Errorf("connect to etcd %s: %s", servers, err) + } + + return +} + +func (store *EtcdStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *EtcdStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *EtcdStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *EtcdStore) InsertEntry(ctx context.Context, entry *filer2.Entry) (err error) { + key := genKey(entry.DirAndName()) + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + if _, err := store.client.Put(ctx, string(key), string(value)); err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + return nil +} + +func (store *EtcdStore) UpdateEntry(ctx context.Context, entry *filer2.Entry) (err error) { + return store.InsertEntry(ctx, entry) +} + +func (store *EtcdStore) FindEntry(ctx context.Context, fullpath filer2.FullPath) (entry *filer2.Entry, err error) { + key := genKey(fullpath.DirAndName()) + + resp, err := store.client.Get(ctx, string(key)) + if err != nil { + return nil, fmt.Errorf("get %s : %v", entry.FullPath, err) + } + + if len(resp.Kvs) == 0 { + return nil, filer_pb.ErrNotFound + } + + entry = &filer2.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(resp.Kvs[0].Value) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + return entry, nil +} + +func (store *EtcdStore) DeleteEntry(ctx context.Context, fullpath filer2.FullPath) (err error) { + key := genKey(fullpath.DirAndName()) + + if _, err := store.client.Delete(ctx, string(key)); err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *EtcdStore) DeleteFolderChildren(ctx context.Context, fullpath filer2.FullPath) (err error) { + directoryPrefix := genDirectoryKeyPrefix(fullpath, "") + + if _, err := store.client.Delete(ctx, string(directoryPrefix), clientv3.WithPrefix()); err != nil { + return fmt.Errorf("deleteFolderChildren %s : %v", fullpath, err) + } + + return nil +} + +func (store *EtcdStore) ListDirectoryEntries( + ctx context.Context, fullpath filer2.FullPath, startFileName string, inclusive bool, limit int, +) (entries []*filer2.Entry, err error) { + directoryPrefix := genDirectoryKeyPrefix(fullpath, "") + + resp, err := store.client.Get(ctx, string(directoryPrefix), + clientv3.WithPrefix(), clientv3.WithSort(clientv3.SortByKey, clientv3.SortDescend)) + if err != nil { + return nil, fmt.Errorf("list %s : %v", fullpath, err) + } + + for _, kv := range resp.Kvs { + fileName := getNameFromKey(kv.Key) + if fileName == "" { + continue + } + if fileName == startFileName && !inclusive { + continue + } + limit-- + if limit < 0 { + break + } + entry := &filer2.Entry{ + FullPath: filer2.NewFullPath(string(fullpath), fileName), + } + if decodeErr := entry.DecodeAttributesAndChunks(kv.Value); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + break + } + entries = append(entries, entry) + } + + return entries, err +} + +func genKey(dirPath, fileName string) (key []byte) { + key = []byte(dirPath) + key = append(key, DIR_FILE_SEPARATOR) + key = append(key, []byte(fileName)...) + return key +} + +func genDirectoryKeyPrefix(fullpath filer2.FullPath, startFileName string) (keyPrefix []byte) { + keyPrefix = []byte(string(fullpath)) + keyPrefix = append(keyPrefix, DIR_FILE_SEPARATOR) + if len(startFileName) > 0 { + keyPrefix = append(keyPrefix, []byte(startFileName)...) + } + return keyPrefix +} + +func getNameFromKey(key []byte) string { + sepIndex := len(key) - 1 + for sepIndex >= 0 && key[sepIndex] != DIR_FILE_SEPARATOR { + sepIndex-- + } + + return string(key[sepIndex+1:]) +} diff --git a/weed/filer2/filechunks.go b/weed/filer2/filechunks.go index b248a8c59..711488df1 100644 --- a/weed/filer2/filechunks.go +++ b/weed/filer2/filechunks.go @@ -3,8 +3,8 @@ package filer2 import ( "fmt" "hash/fnv" - "math" "sort" + "sync" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" ) @@ -33,14 +33,14 @@ func ETag(chunks []*filer_pb.FileChunk) (etag string) { func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*filer_pb.FileChunk) { - visibles := nonOverlappingVisibleIntervals(chunks) + visibles := NonOverlappingVisibleIntervals(chunks) fileIds := make(map[string]bool) for _, interval := range visibles { fileIds[interval.fileId] = true } for _, chunk := range chunks { - if found := fileIds[chunk.FileId]; found { + if _, found := fileIds[chunk.GetFileIdString()]; found { compacted = append(compacted, chunk) } else { garbage = append(garbage, chunk) @@ -50,15 +50,15 @@ func CompactFileChunks(chunks []*filer_pb.FileChunk) (compacted, garbage []*file return } -func FindUnusedFileChunks(oldChunks, newChunks []*filer_pb.FileChunk) (unused []*filer_pb.FileChunk) { +func MinusChunks(as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk) { fileIds := make(map[string]bool) - for _, interval := range newChunks { - fileIds[interval.FileId] = true + for _, interval := range bs { + fileIds[interval.GetFileIdString()] = true } - for _, chunk := range oldChunks { - if found := fileIds[chunk.FileId]; !found { - unused = append(unused, chunk) + for _, chunk := range as { + if _, found := fileIds[chunk.GetFileIdString()]; !found { + delta = append(delta, chunk) } } @@ -70,21 +70,35 @@ type ChunkView struct { Offset int64 Size uint64 LogicOffset int64 + IsFullChunk bool + CipherKey []byte + isGzipped bool } func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*ChunkView) { - visibles := nonOverlappingVisibleIntervals(chunks) + visibles := NonOverlappingVisibleIntervals(chunks) + + return ViewFromVisibleIntervals(visibles, offset, size) + +} + +func ViewFromVisibleIntervals(visibles []VisibleInterval, offset int64, size int) (views []*ChunkView) { stop := offset + int64(size) for _, chunk := range visibles { + if chunk.start <= offset && offset < chunk.stop && offset < stop { + isFullChunk := chunk.isFullChunk && chunk.start == offset && chunk.stop <= stop views = append(views, &ChunkView{ FileId: chunk.fileId, Offset: offset - chunk.start, // offset is the data starting location in this file id Size: uint64(min(chunk.stop, stop) - offset), LogicOffset: offset, + IsFullChunk: isFullChunk, + CipherKey: chunk.cipherKey, + isGzipped: chunk.isGzipped, }) offset = min(chunk.stop, stop) } @@ -94,7 +108,7 @@ func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views } -func logPrintf(name string, visibles []*visibleInterval) { +func logPrintf(name string, visibles []VisibleInterval) { /* log.Printf("%s len %d", name, len(visibles)) for _, v := range visibles { @@ -103,152 +117,99 @@ func logPrintf(name string, visibles []*visibleInterval) { */ } -func nonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []*visibleInterval) { +var bufPool = sync.Pool{ + New: func() interface{} { + return new(VisibleInterval) + }, +} - sort.Slice(chunks, func(i, j int) bool { - if chunks[i].Offset < chunks[j].Offset { - return true - } - if chunks[i].Offset == chunks[j].Offset { - return chunks[i].Mtime < chunks[j].Mtime - } - return false - }) +func MergeIntoVisibles(visibles, newVisibles []VisibleInterval, chunk *filer_pb.FileChunk) []VisibleInterval { - if len(chunks) == 0 { - return - } + newV := newVisibleInterval(chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.GetFileIdString(), chunk.Mtime, true, chunk.CipherKey, chunk.IsGzipped) - var parallelIntervals, intervals []*visibleInterval - var minStopInterval, upToDateInterval *visibleInterval - watermarkStart := chunks[0].Offset - for _, chunk := range chunks { - // log.Printf("checking chunk: [%d,%d)", chunk.Offset, chunk.Offset+int64(chunk.Size)) - logPrintf("parallelIntervals", parallelIntervals) - for len(parallelIntervals) > 0 && watermarkStart < chunk.Offset { - logPrintf("parallelIntervals loop 1", parallelIntervals) - logPrintf("parallelIntervals loop 1 intervals", intervals) - minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals) - nextStop := min(minStopInterval.stop, chunk.Offset) - intervals = append(intervals, newVisibleInterval( - max(watermarkStart, minStopInterval.start), - nextStop, - upToDateInterval.fileId, - upToDateInterval.modifiedTime, - )) - watermarkStart = nextStop - logPrintf("parallelIntervals loop intervals =>", intervals) - - // remove processed intervals, possibly multiple - var remaining []*visibleInterval - for _, interval := range parallelIntervals { - if interval.stop != watermarkStart { - remaining = append(remaining, interval) - } - } - parallelIntervals = remaining - logPrintf("parallelIntervals loop 2", parallelIntervals) - logPrintf("parallelIntervals loop 2 intervals", intervals) - } - parallelIntervals = append(parallelIntervals, newVisibleInterval( - chunk.Offset, - chunk.Offset+int64(chunk.Size), - chunk.FileId, - chunk.Mtime, - )) + length := len(visibles) + if length == 0 { + return append(visibles, newV) + } + last := visibles[length-1] + if last.stop <= chunk.Offset { + return append(visibles, newV) } - logPrintf("parallelIntervals loop 3", parallelIntervals) - logPrintf("parallelIntervals loop 3 intervals", intervals) - for len(parallelIntervals) > 0 { - minStopInterval, upToDateInterval = findMinStopInterval(parallelIntervals) - intervals = append(intervals, newVisibleInterval( - max(watermarkStart, minStopInterval.start), - minStopInterval.stop, - upToDateInterval.fileId, - upToDateInterval.modifiedTime, - )) - watermarkStart = minStopInterval.stop - - // remove processed intervals, possibly multiple - var remaining []*visibleInterval - for _, interval := range parallelIntervals { - if interval.stop != watermarkStart { - remaining = append(remaining, interval) - } + logPrintf(" before", visibles) + for _, v := range visibles { + if v.start < chunk.Offset && chunk.Offset < v.stop { + newVisibles = append(newVisibles, newVisibleInterval(v.start, chunk.Offset, v.fileId, v.modifiedTime, false, v.cipherKey, v.isGzipped)) } - parallelIntervals = remaining - } - logPrintf("parallelIntervals loop 4", parallelIntervals) - logPrintf("intervals", intervals) - - // merge connected intervals, now the intervals are non-intersecting - var lastIntervalIndex int - var prevIntervalIndex int - for i, interval := range intervals { - if i == 0 { - prevIntervalIndex = i - lastIntervalIndex = i - continue + chunkStop := chunk.Offset + int64(chunk.Size) + if v.start < chunkStop && chunkStop < v.stop { + newVisibles = append(newVisibles, newVisibleInterval(chunkStop, v.stop, v.fileId, v.modifiedTime, false, v.cipherKey, v.isGzipped)) } - if intervals[i-1].fileId != interval.fileId || - intervals[i-1].stop < intervals[i].start { - visibles = append(visibles, newVisibleInterval( - intervals[prevIntervalIndex].start, - intervals[i-1].stop, - intervals[prevIntervalIndex].fileId, - intervals[prevIntervalIndex].modifiedTime, - )) - prevIntervalIndex = i + if chunkStop <= v.start || v.stop <= chunk.Offset { + newVisibles = append(newVisibles, v) } - lastIntervalIndex = i - logPrintf("intervals loop 1 visibles", visibles) } + newVisibles = append(newVisibles, newV) - visibles = append(visibles, newVisibleInterval( - intervals[prevIntervalIndex].start, - intervals[lastIntervalIndex].stop, - intervals[prevIntervalIndex].fileId, - intervals[prevIntervalIndex].modifiedTime, - )) + logPrintf(" append", newVisibles) - logPrintf("visibles", visibles) + for i := len(newVisibles) - 1; i >= 0; i-- { + if i > 0 && newV.start < newVisibles[i-1].start { + newVisibles[i] = newVisibles[i-1] + } else { + newVisibles[i] = newV + break + } + } + logPrintf(" sorted", newVisibles) - return + return newVisibles } -func findMinStopInterval(intervals []*visibleInterval) (minStopInterval, upToDateInterval *visibleInterval) { - var latestMtime int64 - latestIntervalIndex := 0 - minStop := int64(math.MaxInt64) - minIntervalIndex := 0 - for i, interval := range intervals { - if minStop > interval.stop { - minIntervalIndex = i - minStop = interval.stop - } - if latestMtime < interval.modifiedTime { - latestMtime = interval.modifiedTime - latestIntervalIndex = i - } +func NonOverlappingVisibleIntervals(chunks []*filer_pb.FileChunk) (visibles []VisibleInterval) { + + sort.Slice(chunks, func(i, j int) bool { + return chunks[i].Mtime < chunks[j].Mtime + }) + + var newVisibles []VisibleInterval + for _, chunk := range chunks { + + newVisibles = MergeIntoVisibles(visibles, newVisibles, chunk) + t := visibles[:0] + visibles = newVisibles + newVisibles = t + + logPrintf("add", visibles) + } - minStopInterval = intervals[minIntervalIndex] - upToDateInterval = intervals[latestIntervalIndex] + return } // find non-overlapping visible intervals // visible interval map to one file chunk -type visibleInterval struct { +type VisibleInterval struct { start int64 stop int64 modifiedTime int64 fileId string + isFullChunk bool + cipherKey []byte + isGzipped bool } -func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64) *visibleInterval { - return &visibleInterval{start: start, stop: stop, fileId: fileId, modifiedTime: modifiedTime} +func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, isFullChunk bool, cipherKey []byte, isGzipped bool) VisibleInterval { + return VisibleInterval{ + start: start, + stop: stop, + fileId: fileId, + modifiedTime: modifiedTime, + isFullChunk: isFullChunk, + cipherKey: cipherKey, + isGzipped: isGzipped, + } } func min(x, y int64) int64 { @@ -257,10 +218,3 @@ func min(x, y int64) int64 { } return y } - -func max(x, y int64) int64 { - if x > y { - return x - } - return y -} diff --git a/weed/filer2/filechunks_test.go b/weed/filer2/filechunks_test.go index 90aa3df36..bb4a6c74d 100644 --- a/weed/filer2/filechunks_test.go +++ b/weed/filer2/filechunks_test.go @@ -4,6 +4,7 @@ import ( "log" "testing" + "fmt" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" ) @@ -15,25 +16,54 @@ func TestCompactFileChunks(t *testing.T) { {Offset: 110, Size: 200, FileId: "jkl", Mtime: 300}, } - compacted, garbarge := CompactFileChunks(chunks) - - log.Printf("Compacted: %+v", compacted) - log.Printf("Garbage : %+v", garbarge) + compacted, garbage := CompactFileChunks(chunks) if len(compacted) != 3 { t.Fatalf("unexpected compacted: %d", len(compacted)) } - if len(garbarge) != 1 { - t.Fatalf("unexpected garbarge: %d", len(garbarge)) + if len(garbage) != 1 { + t.Fatalf("unexpected garbage: %d", len(garbage)) + } + +} + +func TestCompactFileChunks2(t *testing.T) { + + chunks := []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 50}, + {Offset: 100, Size: 100, FileId: "def", Mtime: 100}, + {Offset: 200, Size: 100, FileId: "ghi", Mtime: 200}, + {Offset: 0, Size: 100, FileId: "abcf", Mtime: 300}, + {Offset: 50, Size: 100, FileId: "fhfh", Mtime: 400}, + {Offset: 100, Size: 100, FileId: "yuyu", Mtime: 500}, + } + + k := 3 + + for n := 0; n < k; n++ { + chunks = append(chunks, &filer_pb.FileChunk{ + Offset: int64(n * 100), Size: 100, FileId: fmt.Sprintf("fileId%d", n), Mtime: int64(n), + }) + chunks = append(chunks, &filer_pb.FileChunk{ + Offset: int64(n * 50), Size: 100, FileId: fmt.Sprintf("fileId%d", n+k), Mtime: int64(n + k), + }) } + compacted, garbage := CompactFileChunks(chunks) + + if len(compacted) != 4 { + t.Fatalf("unexpected compacted: %d", len(compacted)) + } + if len(garbage) != 8 { + t.Fatalf("unexpected garbage: %d", len(garbage)) + } } func TestIntervalMerging(t *testing.T) { testcases := []struct { Chunks []*filer_pb.FileChunk - Expected []*visibleInterval + Expected []*VisibleInterval }{ // case 0: normal { @@ -42,7 +72,7 @@ func TestIntervalMerging(t *testing.T) { {Offset: 100, Size: 100, FileId: "asdf", Mtime: 134}, {Offset: 200, Size: 100, FileId: "fsad", Mtime: 353}, }, - Expected: []*visibleInterval{ + Expected: []*VisibleInterval{ {start: 0, stop: 100, fileId: "abc"}, {start: 100, stop: 200, fileId: "asdf"}, {start: 200, stop: 300, fileId: "fsad"}, @@ -54,7 +84,7 @@ func TestIntervalMerging(t *testing.T) { {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, }, - Expected: []*visibleInterval{ + Expected: []*VisibleInterval{ {start: 0, stop: 200, fileId: "asdf"}, }, }, @@ -64,7 +94,7 @@ func TestIntervalMerging(t *testing.T) { {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, {Offset: 0, Size: 50, FileId: "asdf", Mtime: 134}, }, - Expected: []*visibleInterval{ + Expected: []*VisibleInterval{ {start: 0, stop: 50, fileId: "asdf"}, {start: 50, stop: 100, fileId: "abc"}, }, @@ -76,7 +106,7 @@ func TestIntervalMerging(t *testing.T) { {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, {Offset: 50, Size: 250, FileId: "xxxx", Mtime: 154}, }, - Expected: []*visibleInterval{ + Expected: []*VisibleInterval{ {start: 0, stop: 50, fileId: "asdf"}, {start: 50, stop: 300, fileId: "xxxx"}, }, @@ -88,7 +118,7 @@ func TestIntervalMerging(t *testing.T) { {Offset: 0, Size: 200, FileId: "asdf", Mtime: 134}, {Offset: 250, Size: 250, FileId: "xxxx", Mtime: 154}, }, - Expected: []*visibleInterval{ + Expected: []*VisibleInterval{ {start: 0, stop: 200, fileId: "asdf"}, {start: 250, stop: 500, fileId: "xxxx"}, }, @@ -101,7 +131,7 @@ func TestIntervalMerging(t *testing.T) { {Offset: 70, Size: 150, FileId: "abc", Mtime: 143}, {Offset: 80, Size: 100, FileId: "xxxx", Mtime: 134}, }, - Expected: []*visibleInterval{ + Expected: []*VisibleInterval{ {start: 0, stop: 200, fileId: "asdf"}, {start: 200, stop: 220, fileId: "abc"}, }, @@ -113,7 +143,7 @@ func TestIntervalMerging(t *testing.T) { {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, }, - Expected: []*visibleInterval{ + Expected: []*VisibleInterval{ {start: 0, stop: 100, fileId: "abc"}, }, }, @@ -127,7 +157,7 @@ func TestIntervalMerging(t *testing.T) { {Offset: 8388608, Size: 3145728, FileId: "5,02982f80de50", Mtime: 160}, {Offset: 11534336, Size: 2842193, FileId: "7,0299ad723803", Mtime: 170}, }, - Expected: []*visibleInterval{ + Expected: []*VisibleInterval{ {start: 0, stop: 2097152, fileId: "3,029565bf3092"}, {start: 2097152, stop: 5242880, fileId: "6,029632f47ae2"}, {start: 5242880, stop: 8388608, fileId: "2,029734c5aa10"}, @@ -135,11 +165,28 @@ func TestIntervalMerging(t *testing.T) { {start: 11534336, stop: 14376529, fileId: "7,0299ad723803"}, }, }, + // case 8: real bug + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 77824, FileId: "4,0b3df938e301", Mtime: 123}, + {Offset: 471040, Size: 472225 - 471040, FileId: "6,0b3e0650019c", Mtime: 130}, + {Offset: 77824, Size: 208896 - 77824, FileId: "4,0b3f0c7202f0", Mtime: 140}, + {Offset: 208896, Size: 339968 - 208896, FileId: "2,0b4031a72689", Mtime: 150}, + {Offset: 339968, Size: 471040 - 339968, FileId: "3,0b416a557362", Mtime: 160}, + }, + Expected: []*VisibleInterval{ + {start: 0, stop: 77824, fileId: "4,0b3df938e301"}, + {start: 77824, stop: 208896, fileId: "4,0b3f0c7202f0"}, + {start: 208896, stop: 339968, fileId: "2,0b4031a72689"}, + {start: 339968, stop: 471040, fileId: "3,0b416a557362"}, + {start: 471040, stop: 472225, fileId: "6,0b3e0650019c"}, + }, + }, } for i, testcase := range testcases { log.Printf("++++++++++ merged test case %d ++++++++++++++++++++", i) - intervals := nonOverlappingVisibleIntervals(testcase.Chunks) + intervals := NonOverlappingVisibleIntervals(testcase.Chunks) for x, interval := range intervals { log.Printf("test case %d, interval %d, start=%d, stop=%d, fileId=%s", i, x, interval.start, interval.stop, interval.fileId) @@ -161,6 +208,7 @@ func TestIntervalMerging(t *testing.T) { if len(intervals) != len(testcase.Expected) { t.Fatalf("failed to compact test case %d, len %d expected %d", i, len(intervals), len(testcase.Expected)) } + } } @@ -283,6 +331,42 @@ func TestChunksReading(t *testing.T) { {Offset: 0, Size: 100, FileId: "asdf", LogicOffset: 100}, }, }, + // case 8: edge cases + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 100, FileId: "abc", Mtime: 123}, + {Offset: 90, Size: 200, FileId: "asdf", Mtime: 134}, + {Offset: 190, Size: 300, FileId: "fsad", Mtime: 353}, + }, + Offset: 0, + Size: 300, + Expected: []*ChunkView{ + {Offset: 0, Size: 90, FileId: "abc", LogicOffset: 0}, + {Offset: 0, Size: 100, FileId: "asdf", LogicOffset: 90}, + {Offset: 0, Size: 110, FileId: "fsad", LogicOffset: 190}, + }, + }, + // case 9: edge cases + { + Chunks: []*filer_pb.FileChunk{ + {Offset: 0, Size: 43175947, FileId: "2,111fc2cbfac1", Mtime: 1}, + {Offset: 43175936, Size: 52981771 - 43175936, FileId: "2,112a36ea7f85", Mtime: 2}, + {Offset: 52981760, Size: 72564747 - 52981760, FileId: "4,112d5f31c5e7", Mtime: 3}, + {Offset: 72564736, Size: 133255179 - 72564736, FileId: "1,113245f0cdb6", Mtime: 4}, + {Offset: 133255168, Size: 137269259 - 133255168, FileId: "3,1141a70733b5", Mtime: 5}, + {Offset: 137269248, Size: 153578836 - 137269248, FileId: "1,114201d5bbdb", Mtime: 6}, + }, + Offset: 0, + Size: 153578836, + Expected: []*ChunkView{ + {Offset: 0, Size: 43175936, FileId: "2,111fc2cbfac1", LogicOffset: 0}, + {Offset: 0, Size: 52981760 - 43175936, FileId: "2,112a36ea7f85", LogicOffset: 43175936}, + {Offset: 0, Size: 72564736 - 52981760, FileId: "4,112d5f31c5e7", LogicOffset: 52981760}, + {Offset: 0, Size: 133255168 - 72564736, FileId: "1,113245f0cdb6", LogicOffset: 72564736}, + {Offset: 0, Size: 137269248 - 133255168, FileId: "3,1141a70733b5", LogicOffset: 133255168}, + {Offset: 0, Size: 153578836 - 137269248, FileId: "1,114201d5bbdb", LogicOffset: 137269248}, + }, + }, } for i, testcase := range testcases { @@ -314,3 +398,23 @@ func TestChunksReading(t *testing.T) { } } + +func BenchmarkCompactFileChunks(b *testing.B) { + + var chunks []*filer_pb.FileChunk + + k := 1024 + + for n := 0; n < k; n++ { + chunks = append(chunks, &filer_pb.FileChunk{ + Offset: int64(n * 100), Size: 100, FileId: fmt.Sprintf("fileId%d", n), Mtime: int64(n), + }) + chunks = append(chunks, &filer_pb.FileChunk{ + Offset: int64(n * 50), Size: 100, FileId: fmt.Sprintf("fileId%d", n+k), Mtime: int64(n + k), + }) + } + + for n := 0; n < b.N; n++ { + CompactFileChunks(chunks) + } +} diff --git a/weed/filer2/filer.go b/weed/filer2/filer.go index 1c9f2cde4..d3343f610 100644 --- a/weed/filer2/filer.go +++ b/weed/filer2/filer.go @@ -8,29 +8,50 @@ import ( "strings" "time" + "google.golang.org/grpc" + + "github.com/karlseguin/ccache" + "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/wdclient" - "github.com/karlseguin/ccache" - "math" +) + +const PaginationSize = 1024 * 256 + +var ( + OS_UID = uint32(os.Getuid()) + OS_GID = uint32(os.Getgid()) ) type Filer struct { - store FilerStore - directoryCache *ccache.Cache - MasterClient *wdclient.MasterClient + store *FilerStoreWrapper + directoryCache *ccache.Cache + MasterClient *wdclient.MasterClient + fileIdDeletionQueue *util.UnboundedQueue + GrpcDialOption grpc.DialOption + DirBucketsPath string + DirQueuesPath string + buckets *FilerBuckets + Cipher bool } -func NewFiler(masters []string) *Filer { - return &Filer{ - directoryCache: ccache.New(ccache.Configure().MaxSize(1000).ItemsToPrune(100)), - MasterClient: wdclient.NewMasterClient(context.Background(), "filer", masters), +func NewFiler(masters []string, grpcDialOption grpc.DialOption, filerGrpcPort uint32) *Filer { + f := &Filer{ + directoryCache: ccache.New(ccache.Configure().MaxSize(1000).ItemsToPrune(100)), + MasterClient: wdclient.NewMasterClient(grpcDialOption, "filer", filerGrpcPort, masters), + fileIdDeletionQueue: util.NewUnboundedQueue(), + GrpcDialOption: grpcDialOption, } + + go f.loopProcessingDeletion() + + return f } func (f *Filer) SetStore(store FilerStore) { - f.store = store + f.store = NewFilerStoreWrapper(store) } func (f *Filer) DisableDirectoryCache() { @@ -45,7 +66,23 @@ func (fs *Filer) KeepConnectedToMaster() { fs.MasterClient.KeepConnectedToMaster() } -func (f *Filer) CreateEntry(entry *Entry) error { +func (f *Filer) BeginTransaction(ctx context.Context) (context.Context, error) { + return f.store.BeginTransaction(ctx) +} + +func (f *Filer) CommitTransaction(ctx context.Context) error { + return f.store.CommitTransaction(ctx) +} + +func (f *Filer) RollbackTransaction(ctx context.Context) error { + return f.store.RollbackTransaction(ctx) +} + +func (f *Filer) CreateEntry(ctx context.Context, entry *Entry, o_excl bool) error { + + if string(entry.FullPath) == "/" { + return nil + } dirParts := strings.Split(string(entry.FullPath), "/") @@ -54,7 +91,7 @@ func (f *Filer) CreateEntry(entry *Entry) error { var lastDirectoryEntry *Entry for i := 1; i < len(dirParts); i++ { - dirPath := "/" + filepath.Join(dirParts[:i]...) + dirPath := "/" + filepath.ToSlash(filepath.Join(dirParts[:i]...)) // fmt.Printf("%d directory: %+v\n", i, dirPath) // first check local cache @@ -63,9 +100,9 @@ func (f *Filer) CreateEntry(entry *Entry) error { // not found, check the store directly if dirEntry == nil { glog.V(4).Infof("find uncached directory: %s", dirPath) - dirEntry, _ = f.FindEntry(FullPath(dirPath)) + dirEntry, _ = f.FindEntry(ctx, FullPath(dirPath)) } else { - glog.V(4).Infof("found cached directory: %s", dirPath) + // glog.V(4).Infof("found cached directory: %s", dirPath) } // no such existing directory @@ -77,25 +114,30 @@ func (f *Filer) CreateEntry(entry *Entry) error { dirEntry = &Entry{ FullPath: FullPath(dirPath), Attr: Attr{ - Mtime: now, - Crtime: now, - Mode: os.ModeDir | 0770, - Uid: entry.Uid, - Gid: entry.Gid, + Mtime: now, + Crtime: now, + Mode: os.ModeDir | 0770, + Uid: entry.Uid, + Gid: entry.Gid, + Collection: entry.Collection, + Replication: entry.Replication, }, } glog.V(2).Infof("create directory: %s %v", dirPath, dirEntry.Mode) - mkdirErr := f.store.InsertEntry(dirEntry) + mkdirErr := f.store.InsertEntry(ctx, dirEntry) if mkdirErr != nil { - if _, err := f.FindEntry(FullPath(dirPath)); err == ErrNotFound { + if _, err := f.FindEntry(ctx, FullPath(dirPath)); err == filer_pb.ErrNotFound { + glog.V(3).Infof("mkdir %s: %v", dirPath, mkdirErr) return fmt.Errorf("mkdir %s: %v", dirPath, mkdirErr) } } else { + f.maybeAddBucket(dirEntry) f.NotifyUpdateEvent(nil, dirEntry, false) } } else if !dirEntry.IsDirectory() { + glog.Errorf("CreateEntry %s: %s should be a directory", entry.FullPath, dirPath) return fmt.Errorf("%s is a file", dirPath) } @@ -110,6 +152,7 @@ func (f *Filer) CreateEntry(entry *Entry) error { } if lastDirectoryEntry == nil { + glog.Errorf("CreateEntry %s: lastDirectoryEntry is nil", entry.FullPath) return fmt.Errorf("parent folder not found: %v", entry.FullPath) } @@ -121,82 +164,118 @@ func (f *Filer) CreateEntry(entry *Entry) error { } */ - oldEntry, _ := f.FindEntry(entry.FullPath) + oldEntry, _ := f.FindEntry(ctx, entry.FullPath) + glog.V(4).Infof("CreateEntry %s: old entry: %v exclusive:%v", entry.FullPath, oldEntry, o_excl) if oldEntry == nil { - if err := f.store.InsertEntry(entry); err != nil { + if err := f.store.InsertEntry(ctx, entry); err != nil { + glog.Errorf("insert entry %s: %v", entry.FullPath, err) return fmt.Errorf("insert entry %s: %v", entry.FullPath, err) } } else { - if err := f.store.UpdateEntry(entry); err != nil { + if o_excl { + glog.V(3).Infof("EEXIST: entry %s already exists", entry.FullPath) + return fmt.Errorf("EEXIST: entry %s already exists", entry.FullPath) + } + if err := f.UpdateEntry(ctx, oldEntry, entry); err != nil { + glog.Errorf("update entry %s: %v", entry.FullPath, err) return fmt.Errorf("update entry %s: %v", entry.FullPath, err) } } + f.maybeAddBucket(entry) f.NotifyUpdateEvent(oldEntry, entry, true) f.deleteChunksIfNotNew(oldEntry, entry) - return nil -} + glog.V(4).Infof("CreateEntry %s: created", entry.FullPath) -func (f *Filer) UpdateEntry(entry *Entry) (err error) { - return f.store.UpdateEntry(entry) + return nil } -func (f *Filer) FindEntry(p FullPath) (entry *Entry, err error) { - return f.store.FindEntry(p) +func (f *Filer) UpdateEntry(ctx context.Context, oldEntry, entry *Entry) (err error) { + if oldEntry != nil { + if oldEntry.IsDirectory() && !entry.IsDirectory() { + glog.Errorf("existing %s is a directory", entry.FullPath) + return fmt.Errorf("existing %s is a directory", entry.FullPath) + } + if !oldEntry.IsDirectory() && entry.IsDirectory() { + glog.Errorf("existing %s is a file", entry.FullPath) + return fmt.Errorf("existing %s is a file", entry.FullPath) + } + } + return f.store.UpdateEntry(ctx, entry) } -func (f *Filer) DeleteEntryMetaAndData(p FullPath, isRecursive bool, shouldDeleteChunks bool) (err error) { - entry, err := f.FindEntry(p) - if err != nil { - return err +func (f *Filer) FindEntry(ctx context.Context, p FullPath) (entry *Entry, err error) { + + now := time.Now() + + if string(p) == "/" { + return &Entry{ + FullPath: p, + Attr: Attr{ + Mtime: now, + Crtime: now, + Mode: os.ModeDir | 0755, + Uid: OS_UID, + Gid: OS_GID, + }, + }, nil } - - if entry.IsDirectory() { - limit := int(1) - if isRecursive { - limit = math.MaxInt32 - } - entries, err := f.ListDirectoryEntries(p, "", false, limit) - if err != nil { - return fmt.Errorf("list folder %s: %v", p, err) + entry, err = f.store.FindEntry(ctx, p) + if entry != nil && entry.TtlSec > 0 { + if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { + f.store.DeleteEntry(ctx, p.Child(entry.Name())) + return nil, filer_pb.ErrNotFound } - if isRecursive { - for _, sub := range entries { - f.DeleteEntryMetaAndData(sub.FullPath, isRecursive, shouldDeleteChunks) - } - } else { - if len(entries) > 0 { - return fmt.Errorf("folder %s is not empty", p) - } - } - f.cacheDelDirectory(string(p)) } + return - if shouldDeleteChunks { - f.DeleteChunks(entry.Chunks) - } +} - if p == "/" { - return nil +func (f *Filer) ListDirectoryEntries(ctx context.Context, p FullPath, startFileName string, inclusive bool, limit int) ([]*Entry, error) { + if strings.HasSuffix(string(p), "/") && len(p) > 1 { + p = p[0 : len(p)-1] } - glog.V(3).Infof("deleting entry %v", p) - f.NotifyUpdateEvent(entry, nil, shouldDeleteChunks) + var makeupEntries []*Entry + entries, expiredCount, lastFileName, err := f.doListDirectoryEntries(ctx, p, startFileName, inclusive, limit) + for expiredCount > 0 && err == nil { + makeupEntries, expiredCount, lastFileName, err = f.doListDirectoryEntries(ctx, p, lastFileName, false, expiredCount) + if err == nil { + entries = append(entries, makeupEntries...) + } + } - return f.store.DeleteEntry(p) + return entries, err } -func (f *Filer) ListDirectoryEntries(p FullPath, startFileName string, inclusive bool, limit int) ([]*Entry, error) { - if strings.HasSuffix(string(p), "/") && len(p) > 1 { - p = p[0 : len(p)-1] +func (f *Filer) doListDirectoryEntries(ctx context.Context, p FullPath, startFileName string, inclusive bool, limit int) (entries []*Entry, expiredCount int, lastFileName string, err error) { + listedEntries, listErr := f.store.ListDirectoryEntries(ctx, p, startFileName, inclusive, limit) + if listErr != nil { + return listedEntries, expiredCount, "", listErr + } + for _, entry := range listedEntries { + lastFileName = entry.Name() + if entry.TtlSec > 0 { + if entry.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { + f.store.DeleteEntry(ctx, p.Child(entry.Name())) + expiredCount++ + continue + } + } + entries = append(entries, entry) } - return f.store.ListDirectoryEntries(p, startFileName, inclusive, limit) + return } func (f *Filer) cacheDelDirectory(dirpath string) { + + if dirpath == "/" { + return + } + if f.directoryCache == nil { return } @@ -205,6 +284,7 @@ func (f *Filer) cacheDelDirectory(dirpath string) { } func (f *Filer) cacheGetDirectory(dirpath string) *Entry { + if f.directoryCache == nil { return nil } @@ -228,45 +308,3 @@ func (f *Filer) cacheSetDirectory(dirpath string, dirEntry *Entry, level int) { f.directoryCache.Set(dirpath, dirEntry, time.Duration(minutes)*time.Minute) } - -func (f *Filer) DeleteChunks(chunks []*filer_pb.FileChunk) { - for _, chunk := range chunks { - f.DeleteFileByFileId(chunk.FileId) - } -} - -func (f *Filer) DeleteFileByFileId(fileId string) { - volumeServer, err := f.MasterClient.LookupVolumeServer(fileId) - if err != nil { - glog.V(0).Infof("can not find file %s: %v", fileId, err) - } - if _, err := operation.DeleteFilesAtOneVolumeServer(volumeServer, []string{fileId}); err != nil { - glog.V(0).Infof("deleting file %s: %v", fileId, err) - } -} - -func (f *Filer) deleteChunksIfNotNew(oldEntry, newEntry *Entry) { - - if oldEntry == nil { - return - } - if newEntry == nil { - f.DeleteChunks(oldEntry.Chunks) - } - - var toDelete []*filer_pb.FileChunk - - for _, oldChunk := range oldEntry.Chunks { - found := false - for _, newChunk := range newEntry.Chunks { - if oldChunk.FileId == newChunk.FileId { - found = true - break - } - } - if !found { - toDelete = append(toDelete, oldChunk) - } - } - f.DeleteChunks(toDelete) -} diff --git a/weed/filer2/filer_buckets.go b/weed/filer2/filer_buckets.go new file mode 100644 index 000000000..601b7dbf3 --- /dev/null +++ b/weed/filer2/filer_buckets.go @@ -0,0 +1,113 @@ +package filer2 + +import ( + "context" + "math" + "sync" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type BucketName string +type BucketOption struct { + Name BucketName + Replication string +} +type FilerBuckets struct { + dirBucketsPath string + buckets map[BucketName]*BucketOption + sync.RWMutex +} + +func (f *Filer) LoadBuckets(dirBucketsPath string) { + + f.buckets = &FilerBuckets{ + buckets: make(map[BucketName]*BucketOption), + } + f.DirBucketsPath = dirBucketsPath + + limit := math.MaxInt32 + + entries, err := f.ListDirectoryEntries(context.Background(), FullPath(dirBucketsPath), "", false, limit) + + if err != nil { + glog.V(1).Infof("no buckets found: %v", err) + return + } + + glog.V(1).Infof("buckets found: %d", len(entries)) + + f.buckets.Lock() + for _, entry := range entries { + f.buckets.buckets[BucketName(entry.Name())] = &BucketOption{ + Name: BucketName(entry.Name()), + Replication: entry.Replication, + } + } + f.buckets.Unlock() + +} + +func (f *Filer) ReadBucketOption(buketName string) (replication string) { + + f.buckets.RLock() + defer f.buckets.RUnlock() + + option, found := f.buckets.buckets[BucketName(buketName)] + + if !found { + return "" + } + return option.Replication + +} + +func (f *Filer) isBucket(entry *Entry) bool { + if !entry.IsDirectory() { + return false + } + parent, dirName := entry.FullPath.DirAndName() + if parent != f.DirBucketsPath { + return false + } + + f.buckets.RLock() + defer f.buckets.RUnlock() + + _, found := f.buckets.buckets[BucketName(dirName)] + + return found + +} + +func (f *Filer) maybeAddBucket(entry *Entry) { + if !entry.IsDirectory() { + return + } + parent, dirName := entry.FullPath.DirAndName() + if parent != f.DirBucketsPath { + return + } + f.addBucket(dirName, &BucketOption{ + Name: BucketName(dirName), + Replication: entry.Replication, + }) +} + +func (f *Filer) addBucket(buketName string, bucketOption *BucketOption) { + + f.buckets.Lock() + defer f.buckets.Unlock() + + f.buckets.buckets[BucketName(buketName)] = bucketOption + +} + +func (f *Filer) deleteBucket(buketName string) { + + f.buckets.Lock() + defer f.buckets.Unlock() + + delete(f.buckets.buckets, BucketName(buketName)) + +} diff --git a/weed/filer2/filer_client_util.go b/weed/filer2/filer_client_util.go new file mode 100644 index 000000000..1c1fa6a5b --- /dev/null +++ b/weed/filer2/filer_client_util.go @@ -0,0 +1,169 @@ +package filer2 + +import ( + "context" + "fmt" + "io" + "math" + "strings" + "sync" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func VolumeId(fileId string) string { + lastCommaIndex := strings.LastIndex(fileId, ",") + if lastCommaIndex > 0 { + return fileId[:lastCommaIndex] + } + return fileId +} + +type FilerClient interface { + WithFilerClient(fn func(filer_pb.SeaweedFilerClient) error) error + AdjustedUrl(hostAndPort string) string +} + +func ReadIntoBuffer(filerClient FilerClient, fullFilePath FullPath, buff []byte, chunkViews []*ChunkView, baseOffset int64) (totalRead int64, err error) { + var vids []string + for _, chunkView := range chunkViews { + vids = append(vids, VolumeId(chunkView.FileId)) + } + + vid2Locations := make(map[string]*filer_pb.Locations) + + err = filerClient.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + glog.V(4).Infof("read fh lookup volume id locations: %v", vids) + resp, err := client.LookupVolume(context.Background(), &filer_pb.LookupVolumeRequest{ + VolumeIds: vids, + }) + if err != nil { + return err + } + + vid2Locations = resp.LocationsMap + + return nil + }) + + if err != nil { + return 0, fmt.Errorf("failed to lookup volume ids %v: %v", vids, err) + } + + var wg sync.WaitGroup + for _, chunkView := range chunkViews { + wg.Add(1) + go func(chunkView *ChunkView) { + defer wg.Done() + + glog.V(4).Infof("read fh reading chunk: %+v", chunkView) + + locations := vid2Locations[VolumeId(chunkView.FileId)] + if locations == nil || len(locations.Locations) == 0 { + glog.V(0).Infof("failed to locate %s", chunkView.FileId) + err = fmt.Errorf("failed to locate %s", chunkView.FileId) + return + } + + volumeServerAddress := filerClient.AdjustedUrl(locations.Locations[0].Url) + var n int64 + n, err = util.ReadUrl(fmt.Sprintf("http://%s/%s", volumeServerAddress, chunkView.FileId), chunkView.CipherKey, chunkView.isGzipped, chunkView.IsFullChunk, chunkView.Offset, int(chunkView.Size), buff[chunkView.LogicOffset-baseOffset:chunkView.LogicOffset-baseOffset+int64(chunkView.Size)]) + + if err != nil { + + glog.V(0).Infof("%v read http://%s/%v %v bytes: %v", fullFilePath, volumeServerAddress, chunkView.FileId, n, err) + + err = fmt.Errorf("failed to read http://%s/%s: %v", + volumeServerAddress, chunkView.FileId, err) + return + } + + glog.V(4).Infof("read fh read %d bytes: %+v", n, chunkView) + totalRead += n + + }(chunkView) + } + wg.Wait() + return +} + +func GetEntry(filerClient FilerClient, fullFilePath FullPath) (entry *filer_pb.Entry, err error) { + + dir, name := fullFilePath.DirAndName() + + err = filerClient.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.LookupDirectoryEntryRequest{ + Directory: dir, + Name: name, + } + + // glog.V(3).Infof("read %s request: %v", fullFilePath, request) + resp, err := filer_pb.LookupEntry(client, request) + if err != nil { + if err == filer_pb.ErrNotFound { + return nil + } + glog.V(3).Infof("read %s %v: %v", fullFilePath, resp, err) + return err + } + + if resp.Entry == nil { + // glog.V(3).Infof("read %s entry: %v", fullFilePath, entry) + return nil + } + + entry = resp.Entry + return nil + }) + + return +} + +func ReadDirAllEntries(filerClient FilerClient, fullDirPath FullPath, prefix string, fn func(entry *filer_pb.Entry, isLast bool)) (err error) { + + err = filerClient.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + lastEntryName := "" + + request := &filer_pb.ListEntriesRequest{ + Directory: string(fullDirPath), + Prefix: prefix, + StartFromFileName: lastEntryName, + Limit: math.MaxUint32, + } + + glog.V(3).Infof("read directory: %v", request) + stream, err := client.ListEntries(context.Background(), request) + if err != nil { + return fmt.Errorf("list %s: %v", fullDirPath, err) + } + + var prevEntry *filer_pb.Entry + for { + resp, recvErr := stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + if prevEntry != nil { + fn(prevEntry, true) + } + break + } else { + return recvErr + } + } + if prevEntry != nil { + fn(prevEntry, false) + } + prevEntry = resp.Entry + } + + return nil + + }) + + return +} diff --git a/weed/filer2/filer_delete_entry.go b/weed/filer2/filer_delete_entry.go new file mode 100644 index 000000000..d0792ac66 --- /dev/null +++ b/weed/filer2/filer_delete_entry.go @@ -0,0 +1,125 @@ +package filer2 + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" +) + +func (f *Filer) DeleteEntryMetaAndData(ctx context.Context, p FullPath, isRecursive bool, ignoreRecursiveError, shouldDeleteChunks bool) (err error) { + if p == "/" { + return nil + } + + entry, findErr := f.FindEntry(ctx, p) + if findErr != nil { + return findErr + } + + isCollection := f.isBucket(entry) + + var chunks []*filer_pb.FileChunk + chunks = append(chunks, entry.Chunks...) + if entry.IsDirectory() { + // delete the folder children, not including the folder itself + var dirChunks []*filer_pb.FileChunk + dirChunks, err = f.doBatchDeleteFolderMetaAndData(ctx, entry, isRecursive, ignoreRecursiveError, shouldDeleteChunks && !isCollection) + if err != nil { + glog.V(0).Infof("delete directory %s: %v", p, err) + return fmt.Errorf("delete directory %s: %v", p, err) + } + chunks = append(chunks, dirChunks...) + } + + // delete the file or folder + err = f.doDeleteEntryMetaAndData(ctx, entry, shouldDeleteChunks) + if err != nil { + return fmt.Errorf("delete file %s: %v", p, err) + } + + if shouldDeleteChunks && !isCollection { + go f.DeleteChunks(chunks) + } + if isCollection { + collectionName := entry.Name() + f.doDeleteCollection(collectionName) + f.deleteBucket(collectionName) + } + + return nil +} + +func (f *Filer) doBatchDeleteFolderMetaAndData(ctx context.Context, entry *Entry, isRecursive bool, ignoreRecursiveError, shouldDeleteChunks bool) (chunks []*filer_pb.FileChunk, err error) { + + lastFileName := "" + includeLastFile := false + for { + entries, err := f.ListDirectoryEntries(ctx, entry.FullPath, lastFileName, includeLastFile, PaginationSize) + if err != nil { + glog.Errorf("list folder %s: %v", entry.FullPath, err) + return nil, fmt.Errorf("list folder %s: %v", entry.FullPath, err) + } + if lastFileName == "" && !isRecursive && len(entries) > 0 { + // only for first iteration in the loop + return nil, fmt.Errorf("fail to delete non-empty folder: %s", entry.FullPath) + } + + for _, sub := range entries { + lastFileName = sub.Name() + var dirChunks []*filer_pb.FileChunk + if sub.IsDirectory() { + dirChunks, err = f.doBatchDeleteFolderMetaAndData(ctx, sub, isRecursive, ignoreRecursiveError, shouldDeleteChunks) + chunks = append(chunks, dirChunks...) + } else { + chunks = append(chunks, sub.Chunks...) + } + if err != nil && !ignoreRecursiveError { + return nil, err + } + } + + if len(entries) < PaginationSize { + break + } + } + + f.cacheDelDirectory(string(entry.FullPath)) + + glog.V(3).Infof("deleting directory %v delete %d chunks: %v", entry.FullPath, len(chunks), shouldDeleteChunks) + + if storeDeletionErr := f.store.DeleteFolderChildren(ctx, entry.FullPath); storeDeletionErr != nil { + return nil, fmt.Errorf("filer store delete: %v", storeDeletionErr) + } + f.NotifyUpdateEvent(entry, nil, shouldDeleteChunks) + + return chunks, nil +} + +func (f *Filer) doDeleteEntryMetaAndData(ctx context.Context, entry *Entry, shouldDeleteChunks bool) (err error) { + + glog.V(3).Infof("deleting entry %v, delete chunks: %v", entry.FullPath, shouldDeleteChunks) + + if storeDeletionErr := f.store.DeleteEntry(ctx, entry.FullPath); storeDeletionErr != nil { + return fmt.Errorf("filer store delete: %v", storeDeletionErr) + } + f.NotifyUpdateEvent(entry, nil, shouldDeleteChunks) + + return nil +} + +func (f *Filer) doDeleteCollection(collectionName string) (err error) { + + return f.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + _, err := client.CollectionDelete(context.Background(), &master_pb.CollectionDeleteRequest{ + Name: collectionName, + }) + if err != nil { + glog.Infof("delete collection %s: %v", collectionName, err) + } + return err + }) + +} diff --git a/weed/filer2/filer_deletion.go b/weed/filer2/filer_deletion.go new file mode 100644 index 000000000..3a64f636e --- /dev/null +++ b/weed/filer2/filer_deletion.go @@ -0,0 +1,84 @@ +package filer2 + +import ( + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func (f *Filer) loopProcessingDeletion() { + + lookupFunc := func(vids []string) (map[string]operation.LookupResult, error) { + m := make(map[string]operation.LookupResult) + for _, vid := range vids { + locs, _ := f.MasterClient.GetVidLocations(vid) + var locations []operation.Location + for _, loc := range locs { + locations = append(locations, operation.Location{ + Url: loc.Url, + PublicUrl: loc.PublicUrl, + }) + } + m[vid] = operation.LookupResult{ + VolumeId: vid, + Locations: locations, + } + } + return m, nil + } + + var deletionCount int + for { + deletionCount = 0 + f.fileIdDeletionQueue.Consume(func(fileIds []string) { + deletionCount = len(fileIds) + _, err := operation.DeleteFilesWithLookupVolumeId(f.GrpcDialOption, fileIds, lookupFunc) + if err != nil { + glog.V(0).Infof("deleting fileIds len=%d error: %v", deletionCount, err) + } else { + glog.V(1).Infof("deleting fileIds len=%d", deletionCount) + } + }) + + if deletionCount == 0 { + time.Sleep(1123 * time.Millisecond) + } + } +} + +func (f *Filer) DeleteChunks(chunks []*filer_pb.FileChunk) { + for _, chunk := range chunks { + f.fileIdDeletionQueue.EnQueue(chunk.GetFileIdString()) + } +} + +// DeleteFileByFileId direct delete by file id. +// Only used when the fileId is not being managed by snapshots. +func (f *Filer) DeleteFileByFileId(fileId string) { + f.fileIdDeletionQueue.EnQueue(fileId) +} + +func (f *Filer) deleteChunksIfNotNew(oldEntry, newEntry *Entry) { + + if oldEntry == nil { + return + } + if newEntry == nil { + f.DeleteChunks(oldEntry.Chunks) + } + + var toDelete []*filer_pb.FileChunk + newChunkIds := make(map[string]bool) + for _, newChunk := range newEntry.Chunks { + newChunkIds[newChunk.GetFileIdString()] = true + } + + for _, oldChunk := range oldEntry.Chunks { + if _, found := newChunkIds[oldChunk.GetFileIdString()]; !found { + toDelete = append(toDelete, oldChunk) + } + } + f.DeleteChunks(toDelete) +} diff --git a/weed/filer2/filer_notify.go b/weed/filer2/filer_notify.go index 44928516c..c37381116 100644 --- a/weed/filer2/filer_notify.go +++ b/weed/filer2/filer_notify.go @@ -20,26 +20,20 @@ func (f *Filer) NotifyUpdateEvent(oldEntry, newEntry *Entry, deleteChunks bool) glog.V(3).Infof("notifying entry update %v", key) + newParentPath := "" + if newEntry != nil { + newParentPath, _ = newEntry.FullPath.DirAndName() + } + notification.Queue.SendMessage( key, &filer_pb.EventNotification{ - OldEntry: toProtoEntry(oldEntry), - NewEntry: toProtoEntry(newEntry), - DeleteChunks: deleteChunks, + OldEntry: oldEntry.ToProtoEntry(), + NewEntry: newEntry.ToProtoEntry(), + DeleteChunks: deleteChunks, + NewParentPath: newParentPath, }, ) } } - -func toProtoEntry(entry *Entry) *filer_pb.Entry { - if entry == nil { - return nil - } - return &filer_pb.Entry{ - Name: string(entry.FullPath), - IsDirectory: entry.IsDirectory(), - Attributes: EntryAttributeToPb(entry), - Chunks: entry.Chunks, - } -} diff --git a/weed/filer2/filer_notify_test.go b/weed/filer2/filer_notify_test.go new file mode 100644 index 000000000..b74e2ad35 --- /dev/null +++ b/weed/filer2/filer_notify_test.go @@ -0,0 +1,51 @@ +package filer2 + +import ( + "testing" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/golang/protobuf/proto" +) + +func TestProtoMarshalText(t *testing.T) { + + oldEntry := &Entry{ + FullPath: FullPath("/this/path/to"), + Attr: Attr{ + Mtime: time.Now(), + Mode: 0644, + Uid: 1, + Mime: "text/json", + TtlSec: 25, + }, + Chunks: []*filer_pb.FileChunk{ + &filer_pb.FileChunk{ + FileId: "234,2423423422", + Offset: 234234, + Size: 234, + Mtime: 12312423, + ETag: "2342342354", + SourceFileId: "23234,2342342342", + }, + }, + } + + notification := &filer_pb.EventNotification{ + OldEntry: oldEntry.ToProtoEntry(), + NewEntry: nil, + DeleteChunks: true, + } + + text := proto.MarshalTextString(notification) + + notification2 := &filer_pb.EventNotification{} + proto.UnmarshalText(text, notification2) + + if notification2.OldEntry.Chunks[0].SourceFileId != notification.OldEntry.Chunks[0].SourceFileId { + t.Fatalf("marshal/unmarshal error: %s", text) + } + + println(text) + +} diff --git a/weed/filer2/filerstore.go b/weed/filer2/filerstore.go index 9ef1d9d48..f724f79c2 100644 --- a/weed/filer2/filerstore.go +++ b/weed/filer2/filerstore.go @@ -1,7 +1,11 @@ package filer2 import ( - "errors" + "context" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/stats" "github.com/chrislusf/seaweedfs/weed/util" ) @@ -9,13 +13,123 @@ type FilerStore interface { // GetName gets the name to locate the configuration in filer.toml file GetName() string // Initialize initializes the file store - Initialize(configuration util.Configuration) error - InsertEntry(*Entry) error - UpdateEntry(*Entry) (err error) + Initialize(configuration util.Configuration, prefix string) error + InsertEntry(context.Context, *Entry) error + UpdateEntry(context.Context, *Entry) (err error) // err == filer2.ErrNotFound if not found - FindEntry(FullPath) (entry *Entry, err error) - DeleteEntry(FullPath) (err error) - ListDirectoryEntries(dirPath FullPath, startFileName string, includeStartFile bool, limit int) ([]*Entry, error) + FindEntry(context.Context, FullPath) (entry *Entry, err error) + DeleteEntry(context.Context, FullPath) (err error) + DeleteFolderChildren(context.Context, FullPath) (err error) + ListDirectoryEntries(ctx context.Context, dirPath FullPath, startFileName string, includeStartFile bool, limit int) ([]*Entry, error) + + BeginTransaction(ctx context.Context) (context.Context, error) + CommitTransaction(ctx context.Context) error + RollbackTransaction(ctx context.Context) error } -var ErrNotFound = errors.New("filer: no entry is found in filer store") +type FilerStoreWrapper struct { + actualStore FilerStore +} + +func NewFilerStoreWrapper(store FilerStore) *FilerStoreWrapper { + if innerStore, ok := store.(*FilerStoreWrapper); ok { + return innerStore + } + return &FilerStoreWrapper{ + actualStore: store, + } +} + +func (fsw *FilerStoreWrapper) GetName() string { + return fsw.actualStore.GetName() +} + +func (fsw *FilerStoreWrapper) Initialize(configuration util.Configuration, prefix string) error { + return fsw.actualStore.Initialize(configuration, prefix) +} + +func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) error { + stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "insert").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "insert").Observe(time.Since(start).Seconds()) + }() + + filer_pb.BeforeEntrySerialization(entry.Chunks) + return fsw.actualStore.InsertEntry(ctx, entry) +} + +func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) error { + stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "update").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "update").Observe(time.Since(start).Seconds()) + }() + + filer_pb.BeforeEntrySerialization(entry.Chunks) + return fsw.actualStore.UpdateEntry(ctx, entry) +} + +func (fsw *FilerStoreWrapper) FindEntry(ctx context.Context, fp FullPath) (entry *Entry, err error) { + stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "find").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "find").Observe(time.Since(start).Seconds()) + }() + + entry, err = fsw.actualStore.FindEntry(ctx, fp) + if err != nil { + return nil, err + } + filer_pb.AfterEntryDeserialization(entry.Chunks) + return +} + +func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp FullPath) (err error) { + stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "delete").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "delete").Observe(time.Since(start).Seconds()) + }() + + return fsw.actualStore.DeleteEntry(ctx, fp) +} + +func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp FullPath) (err error) { + stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "deleteFolderChildren").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "deleteFolderChildren").Observe(time.Since(start).Seconds()) + }() + + return fsw.actualStore.DeleteFolderChildren(ctx, fp) +} + +func (fsw *FilerStoreWrapper) ListDirectoryEntries(ctx context.Context, dirPath FullPath, startFileName string, includeStartFile bool, limit int) ([]*Entry, error) { + stats.FilerStoreCounter.WithLabelValues(fsw.actualStore.GetName(), "list").Inc() + start := time.Now() + defer func() { + stats.FilerStoreHistogram.WithLabelValues(fsw.actualStore.GetName(), "list").Observe(time.Since(start).Seconds()) + }() + + entries, err := fsw.actualStore.ListDirectoryEntries(ctx, dirPath, startFileName, includeStartFile, limit) + if err != nil { + return nil, err + } + for _, entry := range entries { + filer_pb.AfterEntryDeserialization(entry.Chunks) + } + return entries, err +} + +func (fsw *FilerStoreWrapper) BeginTransaction(ctx context.Context) (context.Context, error) { + return fsw.actualStore.BeginTransaction(ctx) +} + +func (fsw *FilerStoreWrapper) CommitTransaction(ctx context.Context) error { + return fsw.actualStore.CommitTransaction(ctx) +} + +func (fsw *FilerStoreWrapper) RollbackTransaction(ctx context.Context) error { + return fsw.actualStore.RollbackTransaction(ctx) +} diff --git a/weed/filer2/fullpath.go b/weed/filer2/fullpath.go index be6e34431..133069f93 100644 --- a/weed/filer2/fullpath.go +++ b/weed/filer2/fullpath.go @@ -3,15 +3,14 @@ package filer2 import ( "path/filepath" "strings" + + "github.com/chrislusf/seaweedfs/weed/util" ) type FullPath string func NewFullPath(dir, name string) FullPath { - if strings.HasSuffix(dir, "/") { - return FullPath(dir + name) - } - return FullPath(dir + "/" + name) + return FullPath(dir).Child(name) } func (fp FullPath) DirAndName() (string, string) { @@ -29,3 +28,15 @@ func (fp FullPath) Name() string { _, name := filepath.Split(string(fp)) return name } + +func (fp FullPath) Child(name string) FullPath { + dir := string(fp) + if strings.HasSuffix(dir, "/") { + return FullPath(dir + name) + } + return FullPath(dir + "/" + name) +} + +func (fp FullPath) AsInode() uint64 { + return uint64(util.HashStringToLong(string(fp))) +} diff --git a/weed/filer2/leveldb/leveldb_store.go b/weed/filer2/leveldb/leveldb_store.go index 5f3427a3d..807fcb56f 100644 --- a/weed/filer2/leveldb/leveldb_store.go +++ b/weed/filer2/leveldb/leveldb_store.go @@ -2,13 +2,17 @@ package leveldb import ( "bytes" + "context" "fmt" + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/opt" + leveldb_util "github.com/syndtr/goleveldb/leveldb/util" + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" weed_util "github.com/chrislusf/seaweedfs/weed/util" - "github.com/syndtr/goleveldb/leveldb" - leveldb_util "github.com/syndtr/goleveldb/leveldb/util" ) const ( @@ -27,23 +31,41 @@ func (store *LevelDBStore) GetName() string { return "leveldb" } -func (store *LevelDBStore) Initialize(configuration weed_util.Configuration) (err error) { - dir := configuration.GetString("dir") +func (store *LevelDBStore) Initialize(configuration weed_util.Configuration, prefix string) (err error) { + dir := configuration.GetString(prefix + "dir") return store.initialize(dir) } func (store *LevelDBStore) initialize(dir string) (err error) { + glog.Infof("filer store dir: %s", dir) if err := weed_util.TestFolderWritable(dir); err != nil { return fmt.Errorf("Check Level Folder %s Writable: %s", dir, err) } - if store.db, err = leveldb.OpenFile(dir, nil); err != nil { + opts := &opt.Options{ + BlockCacheCapacity: 32 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 16 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 10, + } + + if store.db, err = leveldb.OpenFile(dir, opts); err != nil { + glog.Infof("filer store open dir %s: %v", dir, err) return } return } -func (store *LevelDBStore) InsertEntry(entry *filer2.Entry) (err error) { +func (store *LevelDBStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *LevelDBStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *LevelDBStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *LevelDBStore) InsertEntry(ctx context.Context, entry *filer2.Entry) (err error) { key := genKey(entry.DirAndName()) value, err := entry.EncodeAttributesAndChunks() @@ -62,18 +84,18 @@ func (store *LevelDBStore) InsertEntry(entry *filer2.Entry) (err error) { return nil } -func (store *LevelDBStore) UpdateEntry(entry *filer2.Entry) (err error) { +func (store *LevelDBStore) UpdateEntry(ctx context.Context, entry *filer2.Entry) (err error) { - return store.InsertEntry(entry) + return store.InsertEntry(ctx, entry) } -func (store *LevelDBStore) FindEntry(fullpath filer2.FullPath) (entry *filer2.Entry, err error) { +func (store *LevelDBStore) FindEntry(ctx context.Context, fullpath filer2.FullPath) (entry *filer2.Entry, err error) { key := genKey(fullpath.DirAndName()) data, err := store.db.Get(key, nil) if err == leveldb.ErrNotFound { - return nil, filer2.ErrNotFound + return nil, filer_pb.ErrNotFound } if err != nil { return nil, fmt.Errorf("get %s : %v", entry.FullPath, err) @@ -92,7 +114,7 @@ func (store *LevelDBStore) FindEntry(fullpath filer2.FullPath) (entry *filer2.En return entry, nil } -func (store *LevelDBStore) DeleteEntry(fullpath filer2.FullPath) (err error) { +func (store *LevelDBStore) DeleteEntry(ctx context.Context, fullpath filer2.FullPath) (err error) { key := genKey(fullpath.DirAndName()) err = store.db.Delete(key, nil) @@ -103,7 +125,35 @@ func (store *LevelDBStore) DeleteEntry(fullpath filer2.FullPath) (err error) { return nil } -func (store *LevelDBStore) ListDirectoryEntries(fullpath filer2.FullPath, startFileName string, inclusive bool, +func (store *LevelDBStore) DeleteFolderChildren(ctx context.Context, fullpath filer2.FullPath) (err error) { + + batch := new(leveldb.Batch) + + directoryPrefix := genDirectoryKeyPrefix(fullpath, "") + iter := store.db.NewIterator(&leveldb_util.Range{Start: directoryPrefix}, nil) + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, directoryPrefix) { + break + } + fileName := getNameFromKey(key) + if fileName == "" { + continue + } + batch.Delete([]byte(genKey(string(fullpath), fileName))) + } + iter.Release() + + err = store.db.Write(batch, nil) + + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *LevelDBStore) ListDirectoryEntries(ctx context.Context, fullpath filer2.FullPath, startFileName string, inclusive bool, limit int) (entries []*filer2.Entry, err error) { directoryPrefix := genDirectoryKeyPrefix(fullpath, "") diff --git a/weed/filer2/leveldb/leveldb_store_test.go b/weed/filer2/leveldb/leveldb_store_test.go index ad72a2e60..497158420 100644 --- a/weed/filer2/leveldb/leveldb_store_test.go +++ b/weed/filer2/leveldb/leveldb_store_test.go @@ -1,6 +1,7 @@ package leveldb import ( + "context" "github.com/chrislusf/seaweedfs/weed/filer2" "io/ioutil" "os" @@ -8,7 +9,7 @@ import ( ) func TestCreateAndFind(t *testing.T) { - filer := filer2.NewFiler(nil) + filer := filer2.NewFiler(nil, nil, 0) dir, _ := ioutil.TempDir("", "seaweedfs_filer_test") defer os.RemoveAll(dir) store := &LevelDBStore{} @@ -18,6 +19,8 @@ func TestCreateAndFind(t *testing.T) { fullpath := filer2.FullPath("/home/chris/this/is/one/file1.jpg") + ctx := context.Background() + entry1 := &filer2.Entry{ FullPath: fullpath, Attr: filer2.Attr{ @@ -27,12 +30,12 @@ func TestCreateAndFind(t *testing.T) { }, } - if err := filer.CreateEntry(entry1); err != nil { + if err := filer.CreateEntry(ctx, entry1, false); err != nil { t.Errorf("create entry %v: %v", entry1.FullPath, err) return } - entry, err := filer.FindEntry(fullpath) + entry, err := filer.FindEntry(ctx, fullpath) if err != nil { t.Errorf("find entry: %v", err) @@ -45,17 +48,41 @@ func TestCreateAndFind(t *testing.T) { } // checking one upper directory - entries, _ := filer.ListDirectoryEntries(filer2.FullPath("/home/chris/this/is/one"), "", false, 100) + entries, _ := filer.ListDirectoryEntries(ctx, filer2.FullPath("/home/chris/this/is/one"), "", false, 100) if len(entries) != 1 { t.Errorf("list entries count: %v", len(entries)) return } // checking one upper directory - entries, _ = filer.ListDirectoryEntries(filer2.FullPath("/"), "", false, 100) + entries, _ = filer.ListDirectoryEntries(ctx, filer2.FullPath("/"), "", false, 100) if len(entries) != 1 { t.Errorf("list entries count: %v", len(entries)) return } } + +func TestEmptyRoot(t *testing.T) { + filer := filer2.NewFiler(nil, nil, 0) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test2") + defer os.RemoveAll(dir) + store := &LevelDBStore{} + store.initialize(dir) + filer.SetStore(store) + filer.DisableDirectoryCache() + + ctx := context.Background() + + // checking one upper directory + entries, err := filer.ListDirectoryEntries(ctx, filer2.FullPath("/"), "", false, 100) + if err != nil { + t.Errorf("list entries: %v", err) + return + } + if len(entries) != 0 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} diff --git a/weed/filer2/leveldb2/leveldb2_store.go b/weed/filer2/leveldb2/leveldb2_store.go new file mode 100644 index 000000000..0b07c6833 --- /dev/null +++ b/weed/filer2/leveldb2/leveldb2_store.go @@ -0,0 +1,238 @@ +package leveldb + +import ( + "bytes" + "context" + "crypto/md5" + "fmt" + "io" + "os" + + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/opt" + leveldb_util "github.com/syndtr/goleveldb/leveldb/util" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + weed_util "github.com/chrislusf/seaweedfs/weed/util" +) + +func init() { + filer2.Stores = append(filer2.Stores, &LevelDB2Store{}) +} + +type LevelDB2Store struct { + dbs []*leveldb.DB + dbCount int +} + +func (store *LevelDB2Store) GetName() string { + return "leveldb2" +} + +func (store *LevelDB2Store) Initialize(configuration weed_util.Configuration, prefix string) (err error) { + dir := configuration.GetString(prefix + "dir") + return store.initialize(dir, 8) +} + +func (store *LevelDB2Store) initialize(dir string, dbCount int) (err error) { + glog.Infof("filer store leveldb2 dir: %s", dir) + if err := weed_util.TestFolderWritable(dir); err != nil { + return fmt.Errorf("Check Level Folder %s Writable: %s", dir, err) + } + + opts := &opt.Options{ + BlockCacheCapacity: 32 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 16 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 4, + } + + for d := 0; d < dbCount; d++ { + dbFolder := fmt.Sprintf("%s/%02d", dir, d) + os.MkdirAll(dbFolder, 0755) + db, dbErr := leveldb.OpenFile(dbFolder, opts) + if dbErr != nil { + glog.Errorf("filer store open dir %s: %v", dbFolder, dbErr) + return + } + store.dbs = append(store.dbs, db) + } + store.dbCount = dbCount + + return +} + +func (store *LevelDB2Store) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *LevelDB2Store) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *LevelDB2Store) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *LevelDB2Store) InsertEntry(ctx context.Context, entry *filer2.Entry) (err error) { + dir, name := entry.DirAndName() + key, partitionId := genKey(dir, name, store.dbCount) + + value, err := entry.EncodeAttributesAndChunks() + if err != nil { + return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) + } + + err = store.dbs[partitionId].Put(key, value, nil) + + if err != nil { + return fmt.Errorf("persisting %s : %v", entry.FullPath, err) + } + + // println("saved", entry.FullPath, "chunks", len(entry.Chunks)) + + return nil +} + +func (store *LevelDB2Store) UpdateEntry(ctx context.Context, entry *filer2.Entry) (err error) { + + return store.InsertEntry(ctx, entry) +} + +func (store *LevelDB2Store) FindEntry(ctx context.Context, fullpath filer2.FullPath) (entry *filer2.Entry, err error) { + dir, name := fullpath.DirAndName() + key, partitionId := genKey(dir, name, store.dbCount) + + data, err := store.dbs[partitionId].Get(key, nil) + + if err == leveldb.ErrNotFound { + return nil, filer_pb.ErrNotFound + } + if err != nil { + return nil, fmt.Errorf("get %s : %v", entry.FullPath, err) + } + + entry = &filer2.Entry{ + FullPath: fullpath, + } + err = entry.DecodeAttributesAndChunks(data) + if err != nil { + return entry, fmt.Errorf("decode %s : %v", entry.FullPath, err) + } + + // println("read", entry.FullPath, "chunks", len(entry.Chunks), "data", len(data), string(data)) + + return entry, nil +} + +func (store *LevelDB2Store) DeleteEntry(ctx context.Context, fullpath filer2.FullPath) (err error) { + dir, name := fullpath.DirAndName() + key, partitionId := genKey(dir, name, store.dbCount) + + err = store.dbs[partitionId].Delete(key, nil) + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *LevelDB2Store) DeleteFolderChildren(ctx context.Context, fullpath filer2.FullPath) (err error) { + directoryPrefix, partitionId := genDirectoryKeyPrefix(fullpath, "", store.dbCount) + + batch := new(leveldb.Batch) + + iter := store.dbs[partitionId].NewIterator(&leveldb_util.Range{Start: directoryPrefix}, nil) + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, directoryPrefix) { + break + } + fileName := getNameFromKey(key) + if fileName == "" { + continue + } + batch.Delete(append(directoryPrefix, []byte(fileName)...)) + } + iter.Release() + + err = store.dbs[partitionId].Write(batch, nil) + + if err != nil { + return fmt.Errorf("delete %s : %v", fullpath, err) + } + + return nil +} + +func (store *LevelDB2Store) ListDirectoryEntries(ctx context.Context, fullpath filer2.FullPath, startFileName string, inclusive bool, + limit int) (entries []*filer2.Entry, err error) { + + directoryPrefix, partitionId := genDirectoryKeyPrefix(fullpath, "", store.dbCount) + lastFileStart, _ := genDirectoryKeyPrefix(fullpath, startFileName, store.dbCount) + + iter := store.dbs[partitionId].NewIterator(&leveldb_util.Range{Start: lastFileStart}, nil) + for iter.Next() { + key := iter.Key() + if !bytes.HasPrefix(key, directoryPrefix) { + break + } + fileName := getNameFromKey(key) + if fileName == "" { + continue + } + if fileName == startFileName && !inclusive { + continue + } + limit-- + if limit < 0 { + break + } + entry := &filer2.Entry{ + FullPath: filer2.NewFullPath(string(fullpath), fileName), + } + + // println("list", entry.FullPath, "chunks", len(entry.Chunks)) + + if decodeErr := entry.DecodeAttributesAndChunks(iter.Value()); decodeErr != nil { + err = decodeErr + glog.V(0).Infof("list %s : %v", entry.FullPath, err) + break + } + entries = append(entries, entry) + } + iter.Release() + + return entries, err +} + +func genKey(dirPath, fileName string, dbCount int) (key []byte, partitionId int) { + key, partitionId = hashToBytes(dirPath, dbCount) + key = append(key, []byte(fileName)...) + return key, partitionId +} + +func genDirectoryKeyPrefix(fullpath filer2.FullPath, startFileName string, dbCount int) (keyPrefix []byte, partitionId int) { + keyPrefix, partitionId = hashToBytes(string(fullpath), dbCount) + if len(startFileName) > 0 { + keyPrefix = append(keyPrefix, []byte(startFileName)...) + } + return keyPrefix, partitionId +} + +func getNameFromKey(key []byte) string { + + return string(key[md5.Size:]) + +} + +// hash directory, and use last byte for partitioning +func hashToBytes(dir string, dbCount int) ([]byte, int) { + h := md5.New() + io.WriteString(h, dir) + + b := h.Sum(nil) + + x := b[len(b)-1] + + return b, int(x) % dbCount +} diff --git a/weed/filer2/leveldb2/leveldb2_store_test.go b/weed/filer2/leveldb2/leveldb2_store_test.go new file mode 100644 index 000000000..dc94f2ac7 --- /dev/null +++ b/weed/filer2/leveldb2/leveldb2_store_test.go @@ -0,0 +1,88 @@ +package leveldb + +import ( + "context" + "github.com/chrislusf/seaweedfs/weed/filer2" + "io/ioutil" + "os" + "testing" +) + +func TestCreateAndFind(t *testing.T) { + filer := filer2.NewFiler(nil, nil, 0) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test") + defer os.RemoveAll(dir) + store := &LevelDB2Store{} + store.initialize(dir, 2) + filer.SetStore(store) + filer.DisableDirectoryCache() + + fullpath := filer2.FullPath("/home/chris/this/is/one/file1.jpg") + + ctx := context.Background() + + entry1 := &filer2.Entry{ + FullPath: fullpath, + Attr: filer2.Attr{ + Mode: 0440, + Uid: 1234, + Gid: 5678, + }, + } + + if err := filer.CreateEntry(ctx, entry1, false); err != nil { + t.Errorf("create entry %v: %v", entry1.FullPath, err) + return + } + + entry, err := filer.FindEntry(ctx, fullpath) + + if err != nil { + t.Errorf("find entry: %v", err) + return + } + + if entry.FullPath != entry1.FullPath { + t.Errorf("find wrong entry: %v", entry.FullPath) + return + } + + // checking one upper directory + entries, _ := filer.ListDirectoryEntries(ctx, filer2.FullPath("/home/chris/this/is/one"), "", false, 100) + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + + // checking one upper directory + entries, _ = filer.ListDirectoryEntries(ctx, filer2.FullPath("/"), "", false, 100) + if len(entries) != 1 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} + +func TestEmptyRoot(t *testing.T) { + filer := filer2.NewFiler(nil, nil, 0) + dir, _ := ioutil.TempDir("", "seaweedfs_filer_test2") + defer os.RemoveAll(dir) + store := &LevelDB2Store{} + store.initialize(dir, 2) + filer.SetStore(store) + filer.DisableDirectoryCache() + + ctx := context.Background() + + // checking one upper directory + entries, err := filer.ListDirectoryEntries(ctx, filer2.FullPath("/"), "", false, 100) + if err != nil { + t.Errorf("list entries: %v", err) + return + } + if len(entries) != 0 { + t.Errorf("list entries count: %v", len(entries)) + return + } + +} diff --git a/weed/filer2/memdb/memdb_store.go b/weed/filer2/memdb/memdb_store.go deleted file mode 100644 index 062f1cd1c..000000000 --- a/weed/filer2/memdb/memdb_store.go +++ /dev/null @@ -1,113 +0,0 @@ -package memdb - -import ( - "fmt" - "github.com/chrislusf/seaweedfs/weed/filer2" - "github.com/chrislusf/seaweedfs/weed/util" - "github.com/google/btree" - "strings" -) - -func init() { - filer2.Stores = append(filer2.Stores, &MemDbStore{}) -} - -type MemDbStore struct { - tree *btree.BTree -} - -type entryItem struct { - *filer2.Entry -} - -func (a entryItem) Less(b btree.Item) bool { - return strings.Compare(string(a.FullPath), string(b.(entryItem).FullPath)) < 0 -} - -func (store *MemDbStore) GetName() string { - return "memory" -} - -func (store *MemDbStore) Initialize(configuration util.Configuration) (err error) { - store.tree = btree.New(8) - return nil -} - -func (store *MemDbStore) InsertEntry(entry *filer2.Entry) (err error) { - // println("inserting", entry.FullPath) - store.tree.ReplaceOrInsert(entryItem{entry}) - return nil -} - -func (store *MemDbStore) UpdateEntry(entry *filer2.Entry) (err error) { - if _, err = store.FindEntry(entry.FullPath); err != nil { - return fmt.Errorf("no such file %s : %v", entry.FullPath, err) - } - store.tree.ReplaceOrInsert(entryItem{entry}) - return nil -} - -func (store *MemDbStore) FindEntry(fullpath filer2.FullPath) (entry *filer2.Entry, err error) { - item := store.tree.Get(entryItem{&filer2.Entry{FullPath: fullpath}}) - if item == nil { - return nil, filer2.ErrNotFound - } - entry = item.(entryItem).Entry - return entry, nil -} - -func (store *MemDbStore) DeleteEntry(fullpath filer2.FullPath) (err error) { - store.tree.Delete(entryItem{&filer2.Entry{FullPath: fullpath}}) - return nil -} - -func (store *MemDbStore) ListDirectoryEntries(fullpath filer2.FullPath, startFileName string, inclusive bool, limit int) (entries []*filer2.Entry, err error) { - - startFrom := string(fullpath) - if startFileName != "" { - startFrom = startFrom + "/" + startFileName - } - - store.tree.AscendGreaterOrEqual(entryItem{&filer2.Entry{FullPath: filer2.FullPath(startFrom)}}, - func(item btree.Item) bool { - if limit <= 0 { - return false - } - entry := item.(entryItem).Entry - // println("checking", entry.FullPath) - - if entry.FullPath == fullpath { - // skipping the current directory - // println("skipping the folder", entry.FullPath) - return true - } - - dir, name := entry.FullPath.DirAndName() - if name == startFileName { - if inclusive { - limit-- - entries = append(entries, entry) - } - return true - } - - // only iterate the same prefix - if !strings.HasPrefix(string(entry.FullPath), string(fullpath)) { - // println("breaking from", entry.FullPath) - return false - } - - if dir != string(fullpath) { - // this could be items in deeper directories - // println("skipping deeper folder", entry.FullPath) - return true - } - // now process the directory items - // println("adding entry", entry.FullPath) - limit-- - entries = append(entries, entry) - return true - }, - ) - return entries, nil -} diff --git a/weed/filer2/memdb/memdb_store_test.go b/weed/filer2/memdb/memdb_store_test.go deleted file mode 100644 index cf813e04b..000000000 --- a/weed/filer2/memdb/memdb_store_test.go +++ /dev/null @@ -1,144 +0,0 @@ -package memdb - -import ( - "github.com/chrislusf/seaweedfs/weed/filer2" - "testing" -) - -func TestCreateAndFind(t *testing.T) { - filer := filer2.NewFiler(nil) - store := &MemDbStore{} - store.Initialize(nil) - filer.SetStore(store) - filer.DisableDirectoryCache() - - fullpath := filer2.FullPath("/home/chris/this/is/one/file1.jpg") - - entry1 := &filer2.Entry{ - FullPath: fullpath, - Attr: filer2.Attr{ - Mode: 0440, - Uid: 1234, - Gid: 5678, - }, - } - - if err := filer.CreateEntry(entry1); err != nil { - t.Errorf("create entry %v: %v", entry1.FullPath, err) - return - } - - entry, err := filer.FindEntry(fullpath) - - if err != nil { - t.Errorf("find entry: %v", err) - return - } - - if entry.FullPath != entry1.FullPath { - t.Errorf("find wrong entry: %v", entry.FullPath) - return - } - -} - -func TestCreateFileAndList(t *testing.T) { - filer := filer2.NewFiler(nil) - store := &MemDbStore{} - store.Initialize(nil) - filer.SetStore(store) - filer.DisableDirectoryCache() - - entry1 := &filer2.Entry{ - FullPath: filer2.FullPath("/home/chris/this/is/one/file1.jpg"), - Attr: filer2.Attr{ - Mode: 0440, - Uid: 1234, - Gid: 5678, - }, - } - - entry2 := &filer2.Entry{ - FullPath: filer2.FullPath("/home/chris/this/is/one/file2.jpg"), - Attr: filer2.Attr{ - Mode: 0440, - Uid: 1234, - Gid: 5678, - }, - } - - filer.CreateEntry(entry1) - filer.CreateEntry(entry2) - - // checking the 2 files - entries, err := filer.ListDirectoryEntries(filer2.FullPath("/home/chris/this/is/one/"), "", false, 100) - - if err != nil { - t.Errorf("list entries: %v", err) - return - } - - if len(entries) != 2 { - t.Errorf("list entries count: %v", len(entries)) - return - } - - if entries[0].FullPath != entry1.FullPath { - t.Errorf("find wrong entry 1: %v", entries[0].FullPath) - return - } - - if entries[1].FullPath != entry2.FullPath { - t.Errorf("find wrong entry 2: %v", entries[1].FullPath) - return - } - - // checking the offset - entries, err = filer.ListDirectoryEntries(filer2.FullPath("/home/chris/this/is/one/"), "file1.jpg", false, 100) - if len(entries) != 1 { - t.Errorf("list entries count: %v", len(entries)) - return - } - - // checking one upper directory - entries, _ = filer.ListDirectoryEntries(filer2.FullPath("/home/chris/this/is"), "", false, 100) - if len(entries) != 1 { - t.Errorf("list entries count: %v", len(entries)) - return - } - - // checking root directory - entries, _ = filer.ListDirectoryEntries(filer2.FullPath("/"), "", false, 100) - if len(entries) != 1 { - t.Errorf("list entries count: %v", len(entries)) - return - } - - // add file3 - file3Path := filer2.FullPath("/home/chris/this/is/file3.jpg") - entry3 := &filer2.Entry{ - FullPath: file3Path, - Attr: filer2.Attr{ - Mode: 0440, - Uid: 1234, - Gid: 5678, - }, - } - filer.CreateEntry(entry3) - - // checking one upper directory - entries, _ = filer.ListDirectoryEntries(filer2.FullPath("/home/chris/this/is"), "", false, 100) - if len(entries) != 2 { - t.Errorf("list entries count: %v", len(entries)) - return - } - - // delete file and count - filer.DeleteEntryMetaAndData(file3Path, false, false) - entries, _ = filer.ListDirectoryEntries(filer2.FullPath("/home/chris/this/is"), "", false, 100) - if len(entries) != 1 { - t.Errorf("list entries count: %v", len(entries)) - return - } - -} diff --git a/weed/filer2/mysql/mysql_store.go b/weed/filer2/mysql/mysql_store.go index e18299bd2..63d99cd9d 100644 --- a/weed/filer2/mysql/mysql_store.go +++ b/weed/filer2/mysql/mysql_store.go @@ -26,28 +26,35 @@ func (store *MysqlStore) GetName() string { return "mysql" } -func (store *MysqlStore) Initialize(configuration util.Configuration) (err error) { +func (store *MysqlStore) Initialize(configuration util.Configuration, prefix string) (err error) { return store.initialize( - configuration.GetString("username"), - configuration.GetString("password"), - configuration.GetString("hostname"), - configuration.GetInt("port"), - configuration.GetString("database"), - configuration.GetInt("connection_max_idle"), - configuration.GetInt("connection_max_open"), + configuration.GetString(prefix+"username"), + configuration.GetString(prefix+"password"), + configuration.GetString(prefix+"hostname"), + configuration.GetInt(prefix+"port"), + configuration.GetString(prefix+"database"), + configuration.GetInt(prefix+"connection_max_idle"), + configuration.GetInt(prefix+"connection_max_open"), + configuration.GetBool(prefix+"interpolateParams"), ) } -func (store *MysqlStore) initialize(user, password, hostname string, port int, database string, maxIdle, maxOpen int) (err error) { +func (store *MysqlStore) initialize(user, password, hostname string, port int, database string, maxIdle, maxOpen int, + interpolateParams bool) (err error) { store.SqlInsert = "INSERT INTO filemeta (dirhash,name,directory,meta) VALUES(?,?,?,?)" store.SqlUpdate = "UPDATE filemeta SET meta=? WHERE dirhash=? AND name=? AND directory=?" store.SqlFind = "SELECT meta FROM filemeta WHERE dirhash=? AND name=? AND directory=?" store.SqlDelete = "DELETE FROM filemeta WHERE dirhash=? AND name=? AND directory=?" + store.SqlDeleteFolderChildren = "DELETE FROM filemeta WHERE dirhash=? AND directory=?" store.SqlListExclusive = "SELECT NAME, meta FROM filemeta WHERE dirhash=? AND name>? AND directory=? ORDER BY NAME ASC LIMIT ?" store.SqlListInclusive = "SELECT NAME, meta FROM filemeta WHERE dirhash=? AND name>=? AND directory=? ORDER BY NAME ASC LIMIT ?" sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, user, password, hostname, port, database) + if interpolateParams { + sqlUrl += "&interpolateParams=true" + } + var dbErr error store.DB, dbErr = sql.Open("mysql", sqlUrl) if dbErr != nil { diff --git a/weed/filer2/postgres/README.txt b/weed/filer2/postgres/README.txt index ef2ef683b..cb0c99c63 100644 --- a/weed/filer2/postgres/README.txt +++ b/weed/filer2/postgres/README.txt @@ -9,8 +9,8 @@ $PGHOME/bin/psql --username=postgres --password seaweedfs CREATE TABLE IF NOT EXISTS filemeta ( dirhash BIGINT, - name VARCHAR(1000), - directory VARCHAR(4096), + name VARCHAR(65535), + directory VARCHAR(65535), meta bytea, PRIMARY KEY (dirhash, name) ); diff --git a/weed/filer2/postgres/postgres_store.go b/weed/filer2/postgres/postgres_store.go index ffd3d1e01..2e5f892f1 100644 --- a/weed/filer2/postgres/postgres_store.go +++ b/weed/filer2/postgres/postgres_store.go @@ -11,7 +11,7 @@ import ( ) const ( - CONNECTION_URL_PATTERN = "host=%s port=%d user=%s password=%s dbname=%s sslmode=%s connect_timeout=30" + CONNECTION_URL_PATTERN = "host=%s port=%d user=%s sslmode=%s connect_timeout=30" ) func init() { @@ -26,16 +26,16 @@ func (store *PostgresStore) GetName() string { return "postgres" } -func (store *PostgresStore) Initialize(configuration util.Configuration) (err error) { +func (store *PostgresStore) Initialize(configuration util.Configuration, prefix string) (err error) { return store.initialize( - configuration.GetString("username"), - configuration.GetString("password"), - configuration.GetString("hostname"), - configuration.GetInt("port"), - configuration.GetString("database"), - configuration.GetString("sslmode"), - configuration.GetInt("connection_max_idle"), - configuration.GetInt("connection_max_open"), + configuration.GetString(prefix+"username"), + configuration.GetString(prefix+"password"), + configuration.GetString(prefix+"hostname"), + configuration.GetInt(prefix+"port"), + configuration.GetString(prefix+"database"), + configuration.GetString(prefix+"sslmode"), + configuration.GetInt(prefix+"connection_max_idle"), + configuration.GetInt(prefix+"connection_max_open"), ) } @@ -45,10 +45,17 @@ func (store *PostgresStore) initialize(user, password, hostname string, port int store.SqlUpdate = "UPDATE filemeta SET meta=$1 WHERE dirhash=$2 AND name=$3 AND directory=$4" store.SqlFind = "SELECT meta FROM filemeta WHERE dirhash=$1 AND name=$2 AND directory=$3" store.SqlDelete = "DELETE FROM filemeta WHERE dirhash=$1 AND name=$2 AND directory=$3" + store.SqlDeleteFolderChildren = "DELETE FROM filemeta WHERE dirhash=$1 AND directory=$2" store.SqlListExclusive = "SELECT NAME, meta FROM filemeta WHERE dirhash=$1 AND name>$2 AND directory=$3 ORDER BY NAME ASC LIMIT $4" store.SqlListInclusive = "SELECT NAME, meta FROM filemeta WHERE dirhash=$1 AND name>=$2 AND directory=$3 ORDER BY NAME ASC LIMIT $4" - sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, hostname, port, user, password, database, sslmode) + sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, hostname, port, user, sslmode) + if password != "" { + sqlUrl += " password="+password + } + if database != "" { + sqlUrl += " dbname="+database + } var dbErr error store.DB, dbErr = sql.Open("postgres", sqlUrl) if dbErr != nil { diff --git a/weed/filer2/redis/redis_cluster_store.go b/weed/filer2/redis/redis_cluster_store.go index 4f74a8a22..eaaecb740 100644 --- a/weed/filer2/redis/redis_cluster_store.go +++ b/weed/filer2/redis/redis_cluster_store.go @@ -18,15 +18,25 @@ func (store *RedisClusterStore) GetName() string { return "redis_cluster" } -func (store *RedisClusterStore) Initialize(configuration util.Configuration) (err error) { +func (store *RedisClusterStore) Initialize(configuration util.Configuration, prefix string) (err error) { + + configuration.SetDefault(prefix+"useReadOnly", true) + configuration.SetDefault(prefix+"routeByLatency", true) + return store.initialize( - configuration.GetStringSlice("addresses"), + configuration.GetStringSlice(prefix+"addresses"), + configuration.GetString(prefix+"password"), + configuration.GetBool(prefix+"useReadOnly"), + configuration.GetBool(prefix+"routeByLatency"), ) } -func (store *RedisClusterStore) initialize(addresses []string) (err error) { +func (store *RedisClusterStore) initialize(addresses []string, password string, readOnly, routeByLatency bool) (err error) { store.Client = redis.NewClusterClient(&redis.ClusterOptions{ - Addrs: addresses, + Addrs: addresses, + Password: password, + ReadOnly: readOnly, + RouteByLatency: routeByLatency, }) return } diff --git a/weed/filer2/redis/redis_store.go b/weed/filer2/redis/redis_store.go index c56fa014c..9debdb070 100644 --- a/weed/filer2/redis/redis_store.go +++ b/weed/filer2/redis/redis_store.go @@ -18,11 +18,11 @@ func (store *RedisStore) GetName() string { return "redis" } -func (store *RedisStore) Initialize(configuration util.Configuration) (err error) { +func (store *RedisStore) Initialize(configuration util.Configuration, prefix string) (err error) { return store.initialize( - configuration.GetString("address"), - configuration.GetString("password"), - configuration.GetInt("database"), + configuration.GetString(prefix+"address"), + configuration.GetString(prefix+"password"), + configuration.GetInt(prefix+"database"), ) } diff --git a/weed/filer2/redis/universal_redis_store.go b/weed/filer2/redis/universal_redis_store.go index a5d57b598..c5b9d9416 100644 --- a/weed/filer2/redis/universal_redis_store.go +++ b/weed/filer2/redis/universal_redis_store.go @@ -1,12 +1,17 @@ package redis import ( + "context" "fmt" - "github.com/chrislusf/seaweedfs/weed/filer2" - "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/go-redis/redis" "sort" "strings" + "time" + + "github.com/go-redis/redis" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" ) const ( @@ -17,14 +22,24 @@ type UniversalRedisStore struct { Client redis.UniversalClient } -func (store *UniversalRedisStore) InsertEntry(entry *filer2.Entry) (err error) { +func (store *UniversalRedisStore) BeginTransaction(ctx context.Context) (context.Context, error) { + return ctx, nil +} +func (store *UniversalRedisStore) CommitTransaction(ctx context.Context) error { + return nil +} +func (store *UniversalRedisStore) RollbackTransaction(ctx context.Context) error { + return nil +} + +func (store *UniversalRedisStore) InsertEntry(ctx context.Context, entry *filer2.Entry) (err error) { value, err := entry.EncodeAttributesAndChunks() if err != nil { return fmt.Errorf("encoding %s %+v: %v", entry.FullPath, entry.Attr, err) } - _, err = store.Client.Set(string(entry.FullPath), value, 0).Result() + _, err = store.Client.Set(string(entry.FullPath), value, time.Duration(entry.TtlSec)*time.Second).Result() if err != nil { return fmt.Errorf("persisting %s : %v", entry.FullPath, err) @@ -41,16 +56,16 @@ func (store *UniversalRedisStore) InsertEntry(entry *filer2.Entry) (err error) { return nil } -func (store *UniversalRedisStore) UpdateEntry(entry *filer2.Entry) (err error) { +func (store *UniversalRedisStore) UpdateEntry(ctx context.Context, entry *filer2.Entry) (err error) { - return store.InsertEntry(entry) + return store.InsertEntry(ctx, entry) } -func (store *UniversalRedisStore) FindEntry(fullpath filer2.FullPath) (entry *filer2.Entry, err error) { +func (store *UniversalRedisStore) FindEntry(ctx context.Context, fullpath filer2.FullPath) (entry *filer2.Entry, err error) { data, err := store.Client.Get(string(fullpath)).Result() if err == redis.Nil { - return nil, filer2.ErrNotFound + return nil, filer_pb.ErrNotFound } if err != nil { @@ -68,7 +83,7 @@ func (store *UniversalRedisStore) FindEntry(fullpath filer2.FullPath) (entry *fi return entry, nil } -func (store *UniversalRedisStore) DeleteEntry(fullpath filer2.FullPath) (err error) { +func (store *UniversalRedisStore) DeleteEntry(ctx context.Context, fullpath filer2.FullPath) (err error) { _, err = store.Client.Del(string(fullpath)).Result() @@ -87,11 +102,30 @@ func (store *UniversalRedisStore) DeleteEntry(fullpath filer2.FullPath) (err err return nil } -func (store *UniversalRedisStore) ListDirectoryEntries(fullpath filer2.FullPath, startFileName string, inclusive bool, - limit int) (entries []*filer2.Entry, err error) { +func (store *UniversalRedisStore) DeleteFolderChildren(ctx context.Context, fullpath filer2.FullPath) (err error) { members, err := store.Client.SMembers(genDirectoryListKey(string(fullpath))).Result() if err != nil { + return fmt.Errorf("delete folder %s : %v", fullpath, err) + } + + for _, fileName := range members { + path := filer2.NewFullPath(string(fullpath), fileName) + _, err = store.Client.Del(string(path)).Result() + if err != nil { + return fmt.Errorf("delete %s in parent dir: %v", fullpath, err) + } + } + + return nil +} + +func (store *UniversalRedisStore) ListDirectoryEntries(ctx context.Context, fullpath filer2.FullPath, startFileName string, inclusive bool, + limit int) (entries []*filer2.Entry, err error) { + + dirListKey := genDirectoryListKey(string(fullpath)) + members, err := store.Client.SMembers(dirListKey).Result() + if err != nil { return nil, fmt.Errorf("list %s : %v", fullpath, err) } @@ -125,10 +159,17 @@ func (store *UniversalRedisStore) ListDirectoryEntries(fullpath filer2.FullPath, // fetch entry meta for _, fileName := range members { path := filer2.NewFullPath(string(fullpath), fileName) - entry, err := store.FindEntry(path) + entry, err := store.FindEntry(ctx, path) if err != nil { glog.V(0).Infof("list %s : %v", path, err) } else { + if entry.TtlSec > 0 { + if entry.Attr.Crtime.Add(time.Duration(entry.TtlSec) * time.Second).Before(time.Now()) { + store.Client.Del(string(path)).Result() + store.Client.SRem(dirListKey, fileName).Result() + continue + } + } entries = append(entries, entry) } } diff --git a/weed/filer2/stream.go b/weed/filer2/stream.go new file mode 100644 index 000000000..381d99144 --- /dev/null +++ b/weed/filer2/stream.go @@ -0,0 +1,42 @@ +package filer2 + +import ( + "io" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/wdclient" +) + +func StreamContent(masterClient *wdclient.MasterClient, w io.Writer, chunks []*filer_pb.FileChunk, offset int64, size int) error { + + chunkViews := ViewFromChunks(chunks, offset, size) + + fileId2Url := make(map[string]string) + + for _, chunkView := range chunkViews { + + urlString, err := masterClient.LookupFileId(chunkView.FileId) + if err != nil { + glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err) + return err + } + fileId2Url[chunkView.FileId] = urlString + } + + for _, chunkView := range chunkViews { + + urlString := fileId2Url[chunkView.FileId] + err := util.ReadUrlAsStream(urlString, chunkView.CipherKey, chunkView.isGzipped, chunkView.IsFullChunk, chunkView.Offset, int(chunkView.Size), func(data []byte) { + w.Write(data) + }) + if err != nil { + glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err) + return err + } + } + + return nil + +} diff --git a/weed/filesys/dir.go b/weed/filesys/dir.go index d8b0847d3..483229b3f 100644 --- a/weed/filesys/dir.go +++ b/weed/filesys/dir.go @@ -3,21 +3,20 @@ package filesys import ( "context" "os" - "path" - "path/filepath" + "strings" "time" - "bazil.org/fuse" - "bazil.org/fuse/fs" "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/fuse" + "github.com/seaweedfs/fuse/fs" ) type Dir struct { - Path string - wfs *WFS - attributes *filer_pb.FuseAttributes + Path string + wfs *WFS + entry *filer_pb.Entry } var _ = fs.Node(&Dir{}) @@ -28,78 +27,80 @@ var _ = fs.HandleReadDirAller(&Dir{}) var _ = fs.NodeRemover(&Dir{}) var _ = fs.NodeRenamer(&Dir{}) var _ = fs.NodeSetattrer(&Dir{}) +var _ = fs.NodeGetxattrer(&Dir{}) +var _ = fs.NodeSetxattrer(&Dir{}) +var _ = fs.NodeRemovexattrer(&Dir{}) +var _ = fs.NodeListxattrer(&Dir{}) +var _ = fs.NodeForgetter(&Dir{}) -func (dir *Dir) Attr(context context.Context, attr *fuse.Attr) error { +func (dir *Dir) Attr(ctx context.Context, attr *fuse.Attr) error { - if dir.Path == "/" { - attr.Valid = time.Second - attr.Mode = os.ModeDir | 0777 - return nil - } - - item := dir.wfs.listDirectoryEntriesCache.Get(dir.Path) - if item != nil && !item.Expired() { - entry := item.Value().(*filer_pb.Entry) - - attr.Mtime = time.Unix(entry.Attributes.Mtime, 0) - attr.Ctime = time.Unix(entry.Attributes.Crtime, 0) - attr.Mode = os.FileMode(entry.Attributes.FileMode) - attr.Gid = entry.Attributes.Gid - attr.Uid = entry.Attributes.Uid + // https://github.com/bazil/fuse/issues/196 + attr.Valid = time.Second + if dir.Path == dir.wfs.option.FilerMountRootPath { + dir.setRootDirAttributes(attr) + glog.V(3).Infof("root dir Attr %s, attr: %+v", dir.Path, attr) return nil } - parent, name := filepath.Split(dir.Path) + if err := dir.maybeLoadEntry(); err != nil { + glog.V(3).Infof("dir Attr %s,err: %+v", dir.Path, err) + return err + } - err := dir.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + attr.Inode = filer2.FullPath(dir.Path).AsInode() + attr.Mode = os.FileMode(dir.entry.Attributes.FileMode) | os.ModeDir + attr.Mtime = time.Unix(dir.entry.Attributes.Mtime, 0) + attr.Crtime = time.Unix(dir.entry.Attributes.Crtime, 0) + attr.Gid = dir.entry.Attributes.Gid + attr.Uid = dir.entry.Attributes.Uid - request := &filer_pb.LookupDirectoryEntryRequest{ - Directory: parent, - Name: name, - } + glog.V(3).Infof("dir Attr %s, attr: %+v", dir.Path, attr) - glog.V(1).Infof("read dir %s attr: %v", dir.Path, request) - resp, err := client.LookupDirectoryEntry(context, request) - if err != nil { - glog.V(0).Infof("read dir %s attr %v: %v", dir.Path, request, err) - return err - } + return nil +} - if resp.Entry != nil { - dir.attributes = resp.Entry.Attributes - } +func (dir *Dir) Getxattr(ctx context.Context, req *fuse.GetxattrRequest, resp *fuse.GetxattrResponse) error { - return nil - }) + glog.V(4).Infof("dir Getxattr %s", dir.Path) - if err != nil { + if err := dir.maybeLoadEntry(); err != nil { return err } - // glog.V(1).Infof("dir %s: %v", dir.Path, attributes) - // glog.V(1).Infof("dir %s permission: %v", dir.Path, os.FileMode(attributes.FileMode)) - - attr.Mode = os.FileMode(dir.attributes.FileMode) | os.ModeDir - if dir.Path == "/" && dir.attributes.FileMode == 0 { - attr.Valid = time.Second - } + return getxattr(dir.entry, req, resp) +} - attr.Mtime = time.Unix(dir.attributes.Mtime, 0) - attr.Ctime = time.Unix(dir.attributes.Crtime, 0) - attr.Gid = dir.attributes.Gid - attr.Uid = dir.attributes.Uid +func (dir *Dir) setRootDirAttributes(attr *fuse.Attr) { + attr.Inode = 1 // filer2.FullPath(dir.Path).AsInode() + attr.Valid = time.Hour + attr.Uid = dir.wfs.option.MountUid + attr.Gid = dir.wfs.option.MountGid + attr.Mode = dir.wfs.option.MountMode + attr.Crtime = dir.wfs.option.MountCtime + attr.Ctime = dir.wfs.option.MountCtime + attr.Mtime = dir.wfs.option.MountMtime + attr.Atime = dir.wfs.option.MountMtime + attr.BlockSize = 1024 * 1024 +} - return nil +func (dir *Dir) newFile(name string, entry *filer_pb.Entry) fs.Node { + return dir.wfs.getNode(filer2.NewFullPath(dir.Path, name), func() fs.Node { + return &File{ + Name: name, + dir: dir, + wfs: dir.wfs, + entry: entry, + entryViewCache: nil, + } + }) } -func (dir *Dir) newFile(name string, entry *filer_pb.Entry) *File { - return &File{ - Name: name, - dir: dir, - wfs: dir.wfs, - entry: entry, - } +func (dir *Dir) newDirectory(fullpath filer2.FullPath, entry *filer_pb.Entry) fs.Node { + return dir.wfs.getNode(fullpath, func() fs.Node { + return &Dir{Path: string(fullpath), wfs: dir.wfs, entry: entry} + }) } func (dir *Dir) Create(ctx context.Context, req *fuse.CreateRequest, @@ -113,7 +114,7 @@ func (dir *Dir) Create(ctx context.Context, req *fuse.CreateRequest, Attributes: &filer_pb.FuseAttributes{ Mtime: time.Now().Unix(), Crtime: time.Now().Unix(), - FileMode: uint32(req.Mode), + FileMode: uint32(req.Mode &^ dir.wfs.option.Umask), Uid: req.Uid, Gid: req.Gid, Collection: dir.wfs.option.Collection, @@ -121,99 +122,104 @@ func (dir *Dir) Create(ctx context.Context, req *fuse.CreateRequest, TtlSec: dir.wfs.option.TtlSec, }, }, + OExcl: req.Flags&fuse.OpenExclusive != 0, } - glog.V(1).Infof("create: %v", request) + glog.V(1).Infof("create %s/%s: %v", dir.Path, req.Name, req.Flags) - if request.Entry.IsDirectory { - if err := dir.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { - if _, err := client.CreateEntry(ctx, request); err != nil { - glog.V(0).Infof("create %s/%s: %v", dir.Path, req.Name, err) - return fuse.EIO + if err := dir.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + if err := filer_pb.CreateEntry(client, request); err != nil { + if strings.Contains(err.Error(), "EEXIST") { + return fuse.EEXIST } - return nil - }); err != nil { - return nil, nil, err + return fuse.EIO } + return nil + }); err != nil { + return nil, nil, err } - - file := dir.newFile(req.Name, request.Entry) - if !request.Entry.IsDirectory { - file.isOpen = true + var node fs.Node + if request.Entry.IsDirectory { + node = dir.newDirectory(filer2.NewFullPath(dir.Path, req.Name), request.Entry) + return node, nil, nil } + + node = dir.newFile(req.Name, request.Entry) + file := node.(*File) + file.isOpen++ fh := dir.wfs.AcquireHandle(file, req.Uid, req.Gid) - fh.dirtyMetadata = true return file, fh, nil } func (dir *Dir) Mkdir(ctx context.Context, req *fuse.MkdirRequest) (fs.Node, error) { - err := dir.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + newEntry := &filer_pb.Entry{ + Name: req.Name, + IsDirectory: true, + Attributes: &filer_pb.FuseAttributes{ + Mtime: time.Now().Unix(), + Crtime: time.Now().Unix(), + FileMode: uint32(req.Mode &^ dir.wfs.option.Umask), + Uid: req.Uid, + Gid: req.Gid, + }, + } + + err := dir.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { request := &filer_pb.CreateEntryRequest{ Directory: dir.Path, - Entry: &filer_pb.Entry{ - Name: req.Name, - IsDirectory: true, - Attributes: &filer_pb.FuseAttributes{ - Mtime: time.Now().Unix(), - Crtime: time.Now().Unix(), - FileMode: uint32(req.Mode), - Uid: req.Uid, - Gid: req.Gid, - }, - }, + Entry: newEntry, } glog.V(1).Infof("mkdir: %v", request) - if _, err := client.CreateEntry(ctx, request); err != nil { + if err := filer_pb.CreateEntry(client, request); err != nil { glog.V(0).Infof("mkdir %s/%s: %v", dir.Path, req.Name, err) - return fuse.EIO + return err } return nil }) if err == nil { - node := &Dir{Path: path.Join(dir.Path, req.Name), wfs: dir.wfs} + node := dir.newDirectory(filer2.NewFullPath(dir.Path, req.Name), newEntry) return node, nil } - return nil, err + return nil, fuse.EIO } func (dir *Dir) Lookup(ctx context.Context, req *fuse.LookupRequest, resp *fuse.LookupResponse) (node fs.Node, err error) { - var entry *filer_pb.Entry - err = dir.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + glog.V(4).Infof("dir Lookup %s: %s", dir.Path, req.Name) - request := &filer_pb.LookupDirectoryEntryRequest{ - Directory: dir.Path, - Name: req.Name, - } + fullFilePath := filer2.NewFullPath(dir.Path, req.Name) + entry := dir.wfs.cacheGet(fullFilePath) - glog.V(4).Infof("lookup directory entry: %v", request) - resp, err := client.LookupDirectoryEntry(ctx, request) + if entry == nil { + // glog.V(3).Infof("dir Lookup cache miss %s", fullFilePath) + entry, err = filer2.GetEntry(dir.wfs, fullFilePath) if err != nil { - // glog.V(0).Infof("lookup %s/%s: %v", dir.Path, name, err) - return fuse.ENOENT + glog.V(1).Infof("dir GetEntry %s: %v", fullFilePath, err) + return nil, fuse.ENOENT } - - entry = resp.Entry - - return nil - }) + dir.wfs.cacheSet(fullFilePath, entry, 5*time.Minute) + } else { + glog.V(4).Infof("dir Lookup cache hit %s", fullFilePath) + } if entry != nil { if entry.IsDirectory { - node = &Dir{Path: path.Join(dir.Path, req.Name), wfs: dir.wfs, attributes: entry.Attributes} + node = dir.newDirectory(fullFilePath, entry) } else { node = dir.newFile(req.Name, entry) } - resp.EntryValid = time.Duration(0) + // resp.EntryValid = time.Second + resp.Attr.Inode = fullFilePath.AsInode() + resp.Attr.Valid = time.Second resp.Attr.Mtime = time.Unix(entry.Attributes.Mtime, 0) - resp.Attr.Ctime = time.Unix(entry.Attributes.Crtime, 0) + resp.Attr.Crtime = time.Unix(entry.Attributes.Crtime, 0) resp.Attr.Mode = os.FileMode(entry.Attributes.FileMode) resp.Attr.Gid = entry.Attributes.Gid resp.Attr.Uid = entry.Attributes.Uid @@ -221,58 +227,98 @@ func (dir *Dir) Lookup(ctx context.Context, req *fuse.LookupRequest, resp *fuse. return node, nil } + glog.V(4).Infof("not found dir GetEntry %s: %v", fullFilePath, err) return nil, fuse.ENOENT } func (dir *Dir) ReadDirAll(ctx context.Context) (ret []fuse.Dirent, err error) { - err = dir.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + glog.V(3).Infof("dir ReadDirAll %s", dir.Path) - request := &filer_pb.ListEntriesRequest{ - Directory: dir.Path, - Limit: uint32(dir.wfs.option.DirListingLimit), + cacheTtl := 5 * time.Minute + + readErr := filer2.ReadDirAllEntries(dir.wfs, filer2.FullPath(dir.Path), "", func(entry *filer_pb.Entry, isLast bool) { + fullpath := filer2.NewFullPath(dir.Path, entry.Name) + inode := fullpath.AsInode() + if entry.IsDirectory { + dirent := fuse.Dirent{Inode: inode, Name: entry.Name, Type: fuse.DT_Dir} + ret = append(ret, dirent) + } else { + dirent := fuse.Dirent{Inode: inode, Name: entry.Name, Type: fuse.DT_File} + ret = append(ret, dirent) } + dir.wfs.cacheSet(fullpath, entry, cacheTtl) + }) + if readErr != nil { + glog.V(0).Infof("list %s: %v", dir.Path, err) + return ret, fuse.EIO + } - glog.V(4).Infof("read directory: %v", request) - resp, err := client.ListEntries(ctx, request) - if err != nil { - glog.V(0).Infof("list %s: %v", dir.Path, err) - return fuse.EIO + return ret, err +} + +func (dir *Dir) Remove(ctx context.Context, req *fuse.RemoveRequest) error { + + if !req.Dir { + return dir.removeOneFile(req) + } + + return dir.removeFolder(req) + +} + +func (dir *Dir) removeOneFile(req *fuse.RemoveRequest) error { + + filePath := filer2.NewFullPath(dir.Path, req.Name) + entry, err := filer2.GetEntry(dir.wfs, filePath) + if err != nil { + return err + } + if entry == nil { + return nil + } + + dir.wfs.deleteFileChunks(entry.Chunks) + + dir.wfs.cacheDelete(filePath) + + return dir.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.DeleteEntryRequest{ + Directory: dir.Path, + Name: req.Name, + IsDeleteData: false, } - for _, entry := range resp.Entries { - if entry.IsDirectory { - dirent := fuse.Dirent{Name: entry.Name, Type: fuse.DT_Dir} - ret = append(ret, dirent) - } else { - dirent := fuse.Dirent{Name: entry.Name, Type: fuse.DT_File} - ret = append(ret, dirent) - } - dir.wfs.listDirectoryEntriesCache.Set(dir.Path+"/"+entry.Name, entry, 300*time.Millisecond) + glog.V(3).Infof("remove file: %v", request) + _, err := client.DeleteEntry(context.Background(), request) + if err != nil { + glog.V(3).Infof("not found remove file %s/%s: %v", dir.Path, req.Name, err) + return fuse.ENOENT } return nil }) - return ret, err } -func (dir *Dir) Remove(ctx context.Context, req *fuse.RemoveRequest) error { +func (dir *Dir) removeFolder(req *fuse.RemoveRequest) error { + + dir.wfs.cacheDelete(filer2.NewFullPath(dir.Path, req.Name)) - return dir.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + return dir.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { request := &filer_pb.DeleteEntryRequest{ Directory: dir.Path, Name: req.Name, - IsDirectory: req.Dir, IsDeleteData: true, } - glog.V(1).Infof("remove directory entry: %v", request) - _, err := client.DeleteEntry(ctx, request) + glog.V(3).Infof("remove directory entry: %v", request) + _, err := client.DeleteEntry(context.Background(), request) if err != nil { - glog.V(0).Infof("remove %s/%s: %v", dir.Path, req.Name, err) - return fuse.EIO + glog.V(3).Infof("not found remove %s/%s: %v", dir.Path, req.Name, err) + return fuse.ENOENT } return nil @@ -282,42 +328,122 @@ func (dir *Dir) Remove(ctx context.Context, req *fuse.RemoveRequest) error { func (dir *Dir) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *fuse.SetattrResponse) error { - glog.V(3).Infof("%v dir setattr %+v, fh=%d", dir.Path, req, req.Handle) + glog.V(3).Infof("%v dir setattr %+v", dir.Path, req) + + if err := dir.maybeLoadEntry(); err != nil { + return err + } + if req.Valid.Mode() { - dir.attributes.FileMode = uint32(req.Mode) + dir.entry.Attributes.FileMode = uint32(req.Mode) } if req.Valid.Uid() { - dir.attributes.Uid = req.Uid + dir.entry.Attributes.Uid = req.Uid } if req.Valid.Gid() { - dir.attributes.Gid = req.Gid + dir.entry.Attributes.Gid = req.Gid } if req.Valid.Mtime() { - dir.attributes.Mtime = req.Mtime.Unix() + dir.entry.Attributes.Mtime = req.Mtime.Unix() + } + + dir.wfs.cacheDelete(filer2.FullPath(dir.Path)) + + return dir.saveEntry() + +} + +func (dir *Dir) Setxattr(ctx context.Context, req *fuse.SetxattrRequest) error { + + glog.V(4).Infof("dir Setxattr %s: %s", dir.Path, req.Name) + + if err := dir.maybeLoadEntry(); err != nil { + return err + } + + if err := setxattr(dir.entry, req); err != nil { + return err } + dir.wfs.cacheDelete(filer2.FullPath(dir.Path)) + + return dir.saveEntry() + +} + +func (dir *Dir) Removexattr(ctx context.Context, req *fuse.RemovexattrRequest) error { + + glog.V(4).Infof("dir Removexattr %s: %s", dir.Path, req.Name) + + if err := dir.maybeLoadEntry(); err != nil { + return err + } + + if err := removexattr(dir.entry, req); err != nil { + return err + } + + dir.wfs.cacheDelete(filer2.FullPath(dir.Path)) + + return dir.saveEntry() + +} + +func (dir *Dir) Listxattr(ctx context.Context, req *fuse.ListxattrRequest, resp *fuse.ListxattrResponse) error { + + glog.V(4).Infof("dir Listxattr %s", dir.Path) + + if err := dir.maybeLoadEntry(); err != nil { + return err + } + + if err := listxattr(dir.entry, req, resp); err != nil { + return err + } + + return nil + +} + +func (dir *Dir) Forget() { + glog.V(3).Infof("Forget dir %s", dir.Path) + + dir.wfs.forgetNode(filer2.FullPath(dir.Path)) +} + +func (dir *Dir) maybeLoadEntry() error { + if dir.entry == nil { + parentDirPath, name := filer2.FullPath(dir.Path).DirAndName() + entry, err := dir.wfs.maybeLoadEntry(parentDirPath, name) + if err != nil { + return err + } + dir.entry = entry + } + return nil +} + +func (dir *Dir) saveEntry() error { + parentDir, name := filer2.FullPath(dir.Path).DirAndName() - return dir.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + return dir.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { request := &filer_pb.UpdateEntryRequest{ Directory: parentDir, - Entry: &filer_pb.Entry{ - Name: name, - Attributes: dir.attributes, - }, + Entry: dir.entry, } - glog.V(1).Infof("set attr directory entry: %v", request) - _, err := client.UpdateEntry(ctx, request) + glog.V(1).Infof("save dir entry: %v", request) + _, err := client.UpdateEntry(context.Background(), request) if err != nil { - glog.V(0).Infof("UpdateEntry %s: %v", dir.Path, err) + glog.V(0).Infof("UpdateEntry dir %s/%s: %v", parentDir, name, err) return fuse.EIO } return nil }) - } diff --git a/weed/filesys/dir_link.go b/weed/filesys/dir_link.go new file mode 100644 index 000000000..61ed04c26 --- /dev/null +++ b/weed/filesys/dir_link.go @@ -0,0 +1,66 @@ +package filesys + +import ( + "context" + "os" + "syscall" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/fuse" + "github.com/seaweedfs/fuse/fs" +) + +var _ = fs.NodeSymlinker(&Dir{}) +var _ = fs.NodeReadlinker(&File{}) + +func (dir *Dir) Symlink(ctx context.Context, req *fuse.SymlinkRequest) (fs.Node, error) { + + glog.V(3).Infof("Symlink: %v/%v to %v", dir.Path, req.NewName, req.Target) + + request := &filer_pb.CreateEntryRequest{ + Directory: dir.Path, + Entry: &filer_pb.Entry{ + Name: req.NewName, + IsDirectory: false, + Attributes: &filer_pb.FuseAttributes{ + Mtime: time.Now().Unix(), + Crtime: time.Now().Unix(), + FileMode: uint32((os.FileMode(0777) | os.ModeSymlink) &^ dir.wfs.option.Umask), + Uid: req.Uid, + Gid: req.Gid, + SymlinkTarget: req.Target, + }, + }, + } + + err := dir.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + if err := filer_pb.CreateEntry(client, request); err != nil { + glog.V(0).Infof("symlink %s/%s: %v", dir.Path, req.NewName, err) + return fuse.EIO + } + return nil + }) + + symlink := dir.newFile(req.NewName, request.Entry) + + return symlink, err + +} + +func (file *File) Readlink(ctx context.Context, req *fuse.ReadlinkRequest) (string, error) { + + if err := file.maybeLoadEntry(ctx); err != nil { + return "", err + } + + if os.FileMode(file.entry.Attributes.FileMode)&os.ModeSymlink == 0 { + return "", fuse.Errno(syscall.EINVAL) + } + + glog.V(3).Infof("Readlink: %v/%v => %v", file.dir.Path, file.Name, file.entry.Attributes.SymlinkTarget) + + return file.entry.Attributes.SymlinkTarget, nil + +} diff --git a/weed/filesys/dir_rename.go b/weed/filesys/dir_rename.go index 7ba515202..9b0c0fe6e 100644 --- a/weed/filesys/dir_rename.go +++ b/weed/filesys/dir_rename.go @@ -1,102 +1,63 @@ package filesys import ( - "bazil.org/fuse" - "bazil.org/fuse/fs" "context" + + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "path/filepath" + "github.com/seaweedfs/fuse" + "github.com/seaweedfs/fuse/fs" ) func (dir *Dir) Rename(ctx context.Context, req *fuse.RenameRequest, newDirectory fs.Node) error { newDir := newDirectory.(*Dir) + glog.V(4).Infof("dir Rename %s/%s => %s/%s", dir.Path, req.OldName, newDir.Path, req.NewName) - return dir.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + err := dir.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { - // find existing entry - request := &filer_pb.LookupDirectoryEntryRequest{ - Directory: dir.Path, - Name: req.OldName, + request := &filer_pb.AtomicRenameEntryRequest{ + OldDirectory: dir.Path, + OldName: req.OldName, + NewDirectory: newDir.Path, + NewName: req.NewName, } - glog.V(4).Infof("find existing directory entry: %v", request) - resp, err := client.LookupDirectoryEntry(ctx, request) + _, err := client.AtomicRenameEntry(context.Background(), request) if err != nil { - glog.V(0).Infof("renaming find %s/%s: %v", dir.Path, req.OldName, err) - return fuse.ENOENT - } - - entry := resp.Entry - - glog.V(4).Infof("found existing directory entry resp: %+v", resp) - - return moveEntry(ctx, client, dir.Path, entry, newDir.Path, req.NewName) - }) - -} - -func moveEntry(ctx context.Context, client filer_pb.SeaweedFilerClient, oldParent string, entry *filer_pb.Entry, newParent, newName string) error { - if entry.IsDirectory { - currentDirPath := filepath.Join(oldParent, entry.Name) - request := &filer_pb.ListEntriesRequest{ - Directory: currentDirPath, - } - - glog.V(4).Infof("read directory: %v", request) - resp, err := client.ListEntries(ctx, request) - if err != nil { - glog.V(0).Infof("list %s: %v", oldParent, err) + glog.V(0).Infof("dir Rename %s/%s => %s/%s : %v", dir.Path, req.OldName, newDir.Path, req.NewName, err) return fuse.EIO } - for _, item := range resp.Entries { - err := moveEntry(ctx, client, currentDirPath, item, filepath.Join(newParent, newName), item.Name) - if err != nil { - return err - } - } - - } - - // add to new directory - { - request := &filer_pb.CreateEntryRequest{ - Directory: newParent, - Entry: &filer_pb.Entry{ - Name: newName, - IsDirectory: entry.IsDirectory, - Attributes: entry.Attributes, - Chunks: entry.Chunks, - }, - } + return nil - glog.V(1).Infof("create new entry: %v", request) - if _, err := client.CreateEntry(ctx, request); err != nil { - glog.V(0).Infof("renaming create %s/%s: %v", newParent, newName, err) - return fuse.EIO - } - } + }) - // delete old entry - { - request := &filer_pb.DeleteEntryRequest{ - Directory: oldParent, - Name: entry.Name, - IsDirectory: entry.IsDirectory, - IsDeleteData: false, - } + if err == nil { + newPath := filer2.NewFullPath(newDir.Path, req.NewName) + oldPath := filer2.NewFullPath(dir.Path, req.OldName) + dir.wfs.cacheDelete(newPath) + dir.wfs.cacheDelete(oldPath) + + oldFileNode := dir.wfs.getNode(oldPath, func() fs.Node { + return nil + }) + newDirNode := dir.wfs.getNode(filer2.FullPath(dir.Path), func() fs.Node { + return nil + }) + dir.wfs.forgetNode(newPath) + dir.wfs.forgetNode(oldPath) + if oldFileNode != nil && newDirNode != nil { + oldFile := oldFileNode.(*File) + oldFile.Name = req.NewName + oldFile.dir = newDirNode.(*Dir) + dir.wfs.getNode(newPath, func() fs.Node { + return oldFile + }) - glog.V(1).Infof("remove old entry: %v", request) - _, err := client.DeleteEntry(ctx, request) - if err != nil { - glog.V(0).Infof("renaming delete %s/%s: %v", oldParent, entry.Name, err) - return fuse.EIO } - } - return nil - + return err } diff --git a/weed/filesys/dirty_page.go b/weed/filesys/dirty_page.go index 3d519292b..7e33c97a7 100644 --- a/weed/filesys/dirty_page.go +++ b/weed/filesys/dirty_page.go @@ -4,107 +4,81 @@ import ( "bytes" "context" "fmt" + "io" + "sync" "time" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "sync" + "github.com/chrislusf/seaweedfs/weed/security" ) type ContinuousDirtyPages struct { - hasData bool - Offset int64 - Size int64 - Data []byte - f *File - lock sync.Mutex + intervals *ContinuousIntervals + f *File + lock sync.Mutex + collection string + replication string } func newDirtyPages(file *File) *ContinuousDirtyPages { return &ContinuousDirtyPages{ - Data: make([]byte, file.wfs.option.ChunkSizeLimit), - f: file, + intervals: &ContinuousIntervals{}, + f: file, } } -func (pages *ContinuousDirtyPages) AddPage(ctx context.Context, offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) { +func (pages *ContinuousDirtyPages) releaseResource() { +} + +var counter = int32(0) + +func (pages *ContinuousDirtyPages) AddPage(offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) { pages.lock.Lock() defer pages.lock.Unlock() - var chunk *filer_pb.FileChunk + glog.V(3).Infof("%s AddPage [%d,%d)", pages.f.fullpath(), offset, offset+int64(len(data))) - if len(data) > len(pages.Data) { + if len(data) > int(pages.f.wfs.option.ChunkSizeLimit) { // this is more than what buffer can hold. - return pages.flushAndSave(ctx, offset, data) + return pages.flushAndSave(offset, data) } - if offset < pages.Offset || offset >= pages.Offset+int64(len(pages.Data)) || - pages.Offset+int64(len(pages.Data)) < offset+int64(len(data)) { - // if the data is out of range, - // or buffer is full if adding new data, - // flush current buffer and add new data - - // println("offset", offset, "size", len(data), "existing offset", pages.Offset, "size", pages.Size) + pages.intervals.AddInterval(data, offset) - if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil { - if chunk != nil { - glog.V(4).Infof("%s/%s add save [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size)) - chunks = append(chunks, chunk) - } - } else { - glog.V(0).Infof("%s/%s add save [%d,%d): %v", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), err) - return - } - pages.Offset = offset - copy(pages.Data, data) - pages.Size = int64(len(data)) - return - } + var chunk *filer_pb.FileChunk + var hasSavedData bool - if offset != pages.Offset+pages.Size { - // when this happens, debug shows the data overlapping with existing data is empty - // the data is not just append - if offset == pages.Offset && int(pages.Size) < len(data) { - // glog.V(2).Infof("pages[%d,%d) pages.Data len=%v, data len=%d, pages.Size=%d", pages.Offset, pages.Offset+pages.Size, len(pages.Data), len(data), pages.Size) - copy(pages.Data[pages.Size:], data[pages.Size:]) - } else { - if pages.Size != 0 { - glog.V(0).Infof("%s/%s add page: pages [%d, %d) write [%d, %d)", pages.f.dir.Path, pages.f.Name, pages.Offset, pages.Offset+pages.Size, offset, offset+int64(len(data))) - } - return pages.flushAndSave(ctx, offset, data) + if pages.intervals.TotalSize() > pages.f.wfs.option.ChunkSizeLimit { + chunk, hasSavedData, err = pages.saveExistingLargestPageToStorage() + if hasSavedData { + chunks = append(chunks, chunk) } - } else { - copy(pages.Data[offset-pages.Offset:], data) } - pages.Size = max(pages.Size, offset+int64(len(data))-pages.Offset) - return } -func (pages *ContinuousDirtyPages) flushAndSave(ctx context.Context, offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) { +func (pages *ContinuousDirtyPages) flushAndSave(offset int64, data []byte) (chunks []*filer_pb.FileChunk, err error) { var chunk *filer_pb.FileChunk + var newChunks []*filer_pb.FileChunk // flush existing - if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil { - if chunk != nil { - glog.V(4).Infof("%s/%s flush existing [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size)) - chunks = append(chunks, chunk) + if newChunks, err = pages.saveExistingPagesToStorage(); err == nil { + if newChunks != nil { + chunks = append(chunks, newChunks...) } } else { - glog.V(0).Infof("%s/%s failed to flush1 [%d,%d): %v", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), err) return } - pages.Size = 0 - pages.Offset = 0 // flush the new page - if chunk, err = pages.saveToStorage(ctx, data, offset); err == nil { + if chunk, err = pages.saveToStorage(bytes.NewReader(data), offset, int64(len(data))); err == nil { if chunk != nil { - glog.V(4).Infof("%s/%s flush big request [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size)) + glog.V(4).Infof("%s/%s flush big request [%d,%d) to %s", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size), chunk.FileId) chunks = append(chunks, chunk) } } else { @@ -115,39 +89,62 @@ func (pages *ContinuousDirtyPages) flushAndSave(ctx context.Context, offset int6 return } -func (pages *ContinuousDirtyPages) FlushToStorage(ctx context.Context) (chunk *filer_pb.FileChunk, err error) { +func (pages *ContinuousDirtyPages) FlushToStorage() (chunks []*filer_pb.FileChunk, err error) { pages.lock.Lock() defer pages.lock.Unlock() - if pages.Size == 0 { - return nil, nil - } + return pages.saveExistingPagesToStorage() +} - if chunk, err = pages.saveExistingPagesToStorage(ctx); err == nil { - pages.Size = 0 - pages.Offset = 0 - if chunk != nil { - glog.V(4).Infof("%s/%s flush [%d,%d)", pages.f.dir.Path, pages.f.Name, chunk.Offset, chunk.Offset+int64(chunk.Size)) +func (pages *ContinuousDirtyPages) saveExistingPagesToStorage() (chunks []*filer_pb.FileChunk, err error) { + + var hasSavedData bool + var chunk *filer_pb.FileChunk + + for { + + chunk, hasSavedData, err = pages.saveExistingLargestPageToStorage() + if !hasSavedData { + return chunks, err + } + + if err == nil { + chunks = append(chunks, chunk) + } else { + return } } - return + } -func (pages *ContinuousDirtyPages) saveExistingPagesToStorage(ctx context.Context) (*filer_pb.FileChunk, error) { +func (pages *ContinuousDirtyPages) saveExistingLargestPageToStorage() (chunk *filer_pb.FileChunk, hasSavedData bool, err error) { - if pages.Size == 0 { - return nil, nil + maxList := pages.intervals.RemoveLargestIntervalLinkedList() + if maxList == nil { + return nil, false, nil } - return pages.saveToStorage(ctx, pages.Data[:pages.Size], pages.Offset) + chunk, err = pages.saveToStorage(maxList.ToReader(), maxList.Offset(), maxList.Size()) + if err == nil { + hasSavedData = true + glog.V(3).Infof("%s saveToStorage [%d,%d) %s", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), chunk.FileId) + } else { + glog.V(0).Infof("%s saveToStorage [%d,%d): %v", pages.f.fullpath(), maxList.Offset(), maxList.Offset()+maxList.Size(), err) + return + } + + return } -func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte, offset int64) (*filer_pb.FileChunk, error) { +func (pages *ContinuousDirtyPages) saveToStorage(reader io.Reader, offset int64, size int64) (*filer_pb.FileChunk, error) { var fileId, host string + var auth security.EncodedJwt + + dir, _ := pages.f.fullpath().DirAndName() - if err := pages.f.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + if err := pages.f.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { request := &filer_pb.AssignVolumeRequest{ Count: 1, @@ -155,15 +152,21 @@ func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte Collection: pages.f.wfs.option.Collection, TtlSec: pages.f.wfs.option.TtlSec, DataCenter: pages.f.wfs.option.DataCenter, + ParentPath: dir, } - resp, err := client.AssignVolume(ctx, request) + resp, err := client.AssignVolume(context.Background(), request) if err != nil { glog.V(0).Infof("assign volume failure %v: %v", request, err) return err } + if resp.Error != "" { + return fmt.Errorf("assign volume failure %v: %v", request, resp.Error) + } - fileId, host = resp.FileId, resp.Url + fileId, host, auth = resp.FileId, resp.Url, security.EncodedJwt(resp.Auth) + host = pages.f.wfs.AdjustedUrl(host) + pages.collection, pages.replication = resp.Collection, resp.Replication return nil }); err != nil { @@ -171,8 +174,7 @@ func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte } fileUrl := fmt.Sprintf("http://%s/%s", host, fileId) - bufReader := bytes.NewReader(buf) - uploadResult, err := operation.Upload(fileUrl, pages.f.Name, bufReader, false, "application/octet-stream", nil, "") + uploadResult, err := operation.Upload(fileUrl, pages.f.Name, pages.f.wfs.option.Cipher, reader, false, "", nil, auth) if err != nil { glog.V(0).Infof("upload data %v to %s: %v", pages.f.Name, fileUrl, err) return nil, fmt.Errorf("upload data: %v", err) @@ -183,11 +185,13 @@ func (pages *ContinuousDirtyPages) saveToStorage(ctx context.Context, buf []byte } return &filer_pb.FileChunk{ - FileId: fileId, - Offset: offset, - Size: uint64(len(buf)), - Mtime: time.Now().UnixNano(), - ETag: uploadResult.ETag, + FileId: fileId, + Offset: offset, + Size: uint64(size), + Mtime: time.Now().UnixNano(), + ETag: uploadResult.ETag, + CipherKey: uploadResult.CipherKey, + IsGzipped: uploadResult.Gzip > 0, }, nil } @@ -198,3 +202,18 @@ func max(x, y int64) int64 { } return y } +func min(x, y int64) int64 { + if x < y { + return x + } + return y +} + +func (pages *ContinuousDirtyPages) ReadDirtyData(data []byte, startOffset int64) (offset int64, size int) { + + pages.lock.Lock() + defer pages.lock.Unlock() + + return pages.intervals.ReadData(data, startOffset) + +} diff --git a/weed/filesys/dirty_page_interval.go b/weed/filesys/dirty_page_interval.go new file mode 100644 index 000000000..ec94c6df1 --- /dev/null +++ b/weed/filesys/dirty_page_interval.go @@ -0,0 +1,220 @@ +package filesys + +import ( + "bytes" + "io" + "math" +) + +type IntervalNode struct { + Data []byte + Offset int64 + Size int64 + Next *IntervalNode +} + +type IntervalLinkedList struct { + Head *IntervalNode + Tail *IntervalNode +} + +type ContinuousIntervals struct { + lists []*IntervalLinkedList +} + +func (list *IntervalLinkedList) Offset() int64 { + return list.Head.Offset +} +func (list *IntervalLinkedList) Size() int64 { + return list.Tail.Offset + list.Tail.Size - list.Head.Offset +} +func (list *IntervalLinkedList) addNodeToTail(node *IntervalNode) { + // glog.V(4).Infof("add to tail [%d,%d) + [%d,%d) => [%d,%d)", list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, node.Offset+node.Size, list.Head.Offset, node.Offset+node.Size) + list.Tail.Next = node + list.Tail = node +} +func (list *IntervalLinkedList) addNodeToHead(node *IntervalNode) { + // glog.V(4).Infof("add to head [%d,%d) + [%d,%d) => [%d,%d)", node.Offset, node.Offset+node.Size, list.Head.Offset, list.Tail.Offset+list.Tail.Size, node.Offset, list.Tail.Offset+list.Tail.Size) + node.Next = list.Head + list.Head = node +} + +func (list *IntervalLinkedList) ReadData(buf []byte, start, stop int64) { + t := list.Head + for { + + nodeStart, nodeStop := max(start, t.Offset), min(stop, t.Offset+t.Size) + if nodeStart < nodeStop { + // glog.V(0).Infof("copying start=%d stop=%d t=[%d,%d) t.data=%d => bufSize=%d nodeStart=%d, nodeStop=%d", start, stop, t.Offset, t.Offset+t.Size, len(t.Data), len(buf), nodeStart, nodeStop) + copy(buf[nodeStart-start:], t.Data[nodeStart-t.Offset:nodeStop-t.Offset]) + } + + if t.Next == nil { + break + } + t = t.Next + } +} + +func (c *ContinuousIntervals) TotalSize() (total int64) { + for _, list := range c.lists { + total += list.Size() + } + return +} + +func subList(list *IntervalLinkedList, start, stop int64) *IntervalLinkedList { + var nodes []*IntervalNode + for t := list.Head; t != nil; t = t.Next { + nodeStart, nodeStop := max(start, t.Offset), min(stop, t.Offset+t.Size) + if nodeStart >= nodeStop { + // skip non overlapping IntervalNode + continue + } + nodes = append(nodes, &IntervalNode{ + Data: t.Data[nodeStart-t.Offset : nodeStop-t.Offset], + Offset: nodeStart, + Size: nodeStop - nodeStart, + Next: nil, + }) + } + for i := 1; i < len(nodes); i++ { + nodes[i-1].Next = nodes[i] + } + return &IntervalLinkedList{ + Head: nodes[0], + Tail: nodes[len(nodes)-1], + } +} + +func (c *ContinuousIntervals) AddInterval(data []byte, offset int64) { + + interval := &IntervalNode{Data: data, Offset: offset, Size: int64(len(data))} + + var newLists []*IntervalLinkedList + for _, list := range c.lists { + // if list is to the left of new interval, add to the new list + if list.Tail.Offset+list.Tail.Size <= interval.Offset { + newLists = append(newLists, list) + } + // if list is to the right of new interval, add to the new list + if interval.Offset+interval.Size <= list.Head.Offset { + newLists = append(newLists, list) + } + // if new interval overwrite the right part of the list + if list.Head.Offset < interval.Offset && interval.Offset < list.Tail.Offset+list.Tail.Size { + // create a new list of the left part of existing list + newLists = append(newLists, subList(list, list.Offset(), interval.Offset)) + } + // if new interval overwrite the left part of the list + if list.Head.Offset < interval.Offset+interval.Size && interval.Offset+interval.Size < list.Tail.Offset+list.Tail.Size { + // create a new list of the right part of existing list + newLists = append(newLists, subList(list, interval.Offset+interval.Size, list.Tail.Offset+list.Tail.Size)) + } + // skip anything that is fully overwritten by the new interval + } + + c.lists = newLists + // add the new interval to the lists, connecting neighbor lists + var prevList, nextList *IntervalLinkedList + + for _, list := range c.lists { + if list.Head.Offset == interval.Offset+interval.Size { + nextList = list + break + } + } + + for _, list := range c.lists { + if list.Head.Offset+list.Size() == offset { + list.addNodeToTail(interval) + prevList = list + break + } + } + + if prevList != nil && nextList != nil { + // glog.V(4).Infof("connecting [%d,%d) + [%d,%d) => [%d,%d)", prevList.Head.Offset, prevList.Tail.Offset+prevList.Tail.Size, nextList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size, prevList.Head.Offset, nextList.Tail.Offset+nextList.Tail.Size) + prevList.Tail.Next = nextList.Head + prevList.Tail = nextList.Tail + c.removeList(nextList) + } else if nextList != nil { + // add to head was not done when checking + nextList.addNodeToHead(interval) + } + if prevList == nil && nextList == nil { + c.lists = append(c.lists, &IntervalLinkedList{ + Head: interval, + Tail: interval, + }) + } + + return +} + +func (c *ContinuousIntervals) RemoveLargestIntervalLinkedList() *IntervalLinkedList { + var maxSize int64 + maxIndex := -1 + for k, list := range c.lists { + if maxSize <= list.Size() { + maxSize = list.Size() + maxIndex = k + } + } + if maxSize <= 0 { + return nil + } + + t := c.lists[maxIndex] + c.lists = append(c.lists[0:maxIndex], c.lists[maxIndex+1:]...) + return t + +} + +func (c *ContinuousIntervals) removeList(target *IntervalLinkedList) { + index := -1 + for k, list := range c.lists { + if list.Offset() == target.Offset() { + index = k + } + } + if index < 0 { + return + } + + c.lists = append(c.lists[0:index], c.lists[index+1:]...) + +} + +func (c *ContinuousIntervals) ReadData(data []byte, startOffset int64) (offset int64, size int) { + var minOffset int64 = math.MaxInt64 + var maxStop int64 + for _, list := range c.lists { + start := max(startOffset, list.Offset()) + stop := min(startOffset+int64(len(data)), list.Offset()+list.Size()) + if start <= stop { + list.ReadData(data[start-startOffset:], start, stop) + minOffset = min(minOffset, start) + maxStop = max(maxStop, stop) + } + } + + if minOffset == math.MaxInt64 { + return 0, 0 + } + + offset = minOffset + size = int(maxStop - offset) + return +} + +func (l *IntervalLinkedList) ToReader() io.Reader { + var readers []io.Reader + t := l.Head + readers = append(readers, bytes.NewReader(t.Data)) + for t.Next != nil { + t = t.Next + readers = append(readers, bytes.NewReader(t.Data)) + } + return io.MultiReader(readers...) +} diff --git a/weed/filesys/dirty_page_interval_test.go b/weed/filesys/dirty_page_interval_test.go new file mode 100644 index 000000000..184be2f3b --- /dev/null +++ b/weed/filesys/dirty_page_interval_test.go @@ -0,0 +1,72 @@ +package filesys + +import ( + "bytes" + "testing" +) + +func TestContinuousIntervals_AddIntervalAppend(t *testing.T) { + + c := &ContinuousIntervals{} + + // 25, 25, 25 + c.AddInterval(getBytes(25, 3), 0) + // _, _, 23, 23, 23, 23 + c.AddInterval(getBytes(23, 4), 2) + + expectedData(t, c, 0, 25, 25, 23, 23, 23, 23) + +} + +func TestContinuousIntervals_AddIntervalInnerOverwrite(t *testing.T) { + + c := &ContinuousIntervals{} + + // 25, 25, 25, 25, 25 + c.AddInterval(getBytes(25, 5), 0) + // _, _, 23, 23 + c.AddInterval(getBytes(23, 2), 2) + + expectedData(t, c, 0, 25, 25, 23, 23, 25) + +} + +func TestContinuousIntervals_AddIntervalFullOverwrite(t *testing.T) { + + c := &ContinuousIntervals{} + + // 25, + c.AddInterval(getBytes(25, 1), 0) + // _, _, _, _, 23, 23 + c.AddInterval(getBytes(23, 2), 4) + // _, _, _, 24, 24, 24, 24 + c.AddInterval(getBytes(24, 4), 3) + + // _, 22, 22 + c.AddInterval(getBytes(22, 2), 1) + + expectedData(t, c, 0, 25, 22, 22, 24, 24, 24, 24) + +} + +func expectedData(t *testing.T, c *ContinuousIntervals, offset int, data ...byte) { + start, stop := int64(offset), int64(offset+len(data)) + for _, list := range c.lists { + nodeStart, nodeStop := max(start, list.Head.Offset), min(stop, list.Head.Offset+list.Size()) + if nodeStart < nodeStop { + buf := make([]byte, nodeStop-nodeStart) + list.ReadData(buf, nodeStart, nodeStop) + if bytes.Compare(buf, data[nodeStart-start:nodeStop-start]) != 0 { + t.Errorf("expected %v actual %v", data[nodeStart-start:nodeStop-start], buf) + } + } + } +} + +func getBytes(content byte, length int) []byte { + data := make([]byte, length) + for i := 0; i < length; i++ { + data[i] = content + } + return data +} diff --git a/weed/filesys/file.go b/weed/filesys/file.go index db9cb0a0f..69d440a73 100644 --- a/weed/filesys/file.go +++ b/weed/filesys/file.go @@ -1,15 +1,16 @@ package filesys import ( - "bazil.org/fuse" - "bazil.org/fuse/fs" "context" + "os" + "sort" + "time" + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "os" - "path/filepath" - "time" + "github.com/seaweedfs/fuse" + "github.com/seaweedfs/fuse/fs" ) const blockSize = 512 @@ -18,27 +19,44 @@ var _ = fs.Node(&File{}) var _ = fs.NodeOpener(&File{}) var _ = fs.NodeFsyncer(&File{}) var _ = fs.NodeSetattrer(&File{}) +var _ = fs.NodeGetxattrer(&File{}) +var _ = fs.NodeSetxattrer(&File{}) +var _ = fs.NodeRemovexattrer(&File{}) +var _ = fs.NodeListxattrer(&File{}) +var _ = fs.NodeForgetter(&File{}) type File struct { - Name string - dir *Dir - wfs *WFS - entry *filer_pb.Entry - isOpen bool + Name string + dir *Dir + wfs *WFS + entry *filer_pb.Entry + entryViewCache []filer2.VisibleInterval + isOpen int } -func (file *File) fullpath() string { - return filepath.Join(file.dir.Path, file.Name) +func (file *File) fullpath() filer2.FullPath { + return filer2.NewFullPath(file.dir.Path, file.Name) } func (file *File) Attr(ctx context.Context, attr *fuse.Attr) error { - if err := file.maybeLoadAttributes(ctx); err != nil { - return err + glog.V(4).Infof("file Attr %s, open:%v, existing attr: %+v", file.fullpath(), file.isOpen, attr) + + if file.isOpen <= 0 { + if err := file.maybeLoadEntry(ctx); err != nil { + return err + } } + attr.Inode = file.fullpath().AsInode() + attr.Valid = time.Second attr.Mode = os.FileMode(file.entry.Attributes.FileMode) attr.Size = filer2.TotalSize(file.entry.Chunks) + if file.isOpen > 0 { + attr.Size = file.entry.Attributes.FileSize + glog.V(4).Infof("file Attr %s, open:%v, size: %d", file.fullpath(), file.isOpen, attr.Size) + } + attr.Crtime = time.Unix(file.entry.Attributes.Crtime, 0) attr.Mtime = time.Unix(file.entry.Attributes.Mtime, 0) attr.Gid = file.entry.Attributes.Gid attr.Uid = file.entry.Attributes.Uid @@ -49,11 +67,22 @@ func (file *File) Attr(ctx context.Context, attr *fuse.Attr) error { } +func (file *File) Getxattr(ctx context.Context, req *fuse.GetxattrRequest, resp *fuse.GetxattrResponse) error { + + // glog.V(4).Infof("file Getxattr %s", file.fullpath()) + + if err := file.maybeLoadEntry(ctx); err != nil { + return err + } + + return getxattr(file.entry, req, resp) +} + func (file *File) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.OpenResponse) (fs.Handle, error) { - glog.V(3).Infof("%v file open %+v", file.fullpath(), req) + glog.V(4).Infof("file %v open %+v", file.fullpath(), req) - file.isOpen = true + file.isOpen++ handle := file.wfs.AcquireHandle(file, req.Uid, req.Gid) @@ -67,21 +96,29 @@ func (file *File) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.Op func (file *File) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *fuse.SetattrResponse) error { - if err := file.maybeLoadAttributes(ctx); err != nil { - return err - } + glog.V(3).Infof("%v file setattr %+v, old:%+v", file.fullpath(), req, file.entry.Attributes) - if file.isOpen { - return nil + if err := file.maybeLoadEntry(ctx); err != nil { + return err } - glog.V(3).Infof("%v file setattr %+v, old:%+v", file.fullpath(), req, file.entry.Attributes) if req.Valid.Size() { glog.V(3).Infof("%v file setattr set size=%v", file.fullpath(), req.Size) - if req.Size == 0 { + if req.Size < filer2.TotalSize(file.entry.Chunks) { // fmt.Printf("truncate %v \n", fullPath) - file.entry.Chunks = nil + var chunks []*filer_pb.FileChunk + for _, chunk := range file.entry.Chunks { + int64Size := int64(chunk.Size) + if chunk.Offset+int64Size > int64(req.Size) { + int64Size = int64(req.Size) - chunk.Offset + } + if int64Size > 0 { + chunks = append(chunks, chunk) + } + } + file.entry.Chunks = chunks + file.entryViewCache = nil } file.entry.Attributes.FileSize = req.Size } @@ -97,26 +134,73 @@ func (file *File) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *f file.entry.Attributes.Gid = req.Gid } + if req.Valid.Crtime() { + file.entry.Attributes.Crtime = req.Crtime.Unix() + } + if req.Valid.Mtime() { file.entry.Attributes.Mtime = req.Mtime.Unix() } - return file.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + if file.isOpen > 0 { + return nil + } - request := &filer_pb.UpdateEntryRequest{ - Directory: file.dir.Path, - Entry: file.entry, - } + file.wfs.cacheDelete(file.fullpath()) - glog.V(1).Infof("set attr file entry: %v", request) - _, err := client.UpdateEntry(ctx, request) - if err != nil { - glog.V(0).Infof("UpdateEntry file %s/%s: %v", file.dir.Path, file.Name, err) - return fuse.EIO - } + return file.saveEntry() - return nil - }) +} + +func (file *File) Setxattr(ctx context.Context, req *fuse.SetxattrRequest) error { + + glog.V(4).Infof("file Setxattr %s: %s", file.fullpath(), req.Name) + + if err := file.maybeLoadEntry(ctx); err != nil { + return err + } + + if err := setxattr(file.entry, req); err != nil { + return err + } + + file.wfs.cacheDelete(file.fullpath()) + + return file.saveEntry() + +} + +func (file *File) Removexattr(ctx context.Context, req *fuse.RemovexattrRequest) error { + + glog.V(4).Infof("file Removexattr %s: %s", file.fullpath(), req.Name) + + if err := file.maybeLoadEntry(ctx); err != nil { + return err + } + + if err := removexattr(file.entry, req); err != nil { + return err + } + + file.wfs.cacheDelete(file.fullpath()) + + return file.saveEntry() + +} + +func (file *File) Listxattr(ctx context.Context, req *fuse.ListxattrRequest, resp *fuse.ListxattrResponse) error { + + glog.V(4).Infof("file Listxattr %s", file.fullpath()) + + if err := file.maybeLoadEntry(ctx); err != nil { + return err + } + + if err := listxattr(file.entry, req, resp); err != nil { + return err + } + + return nil } @@ -128,38 +212,65 @@ func (file *File) Fsync(ctx context.Context, req *fuse.FsyncRequest) error { return nil } -func (file *File) maybeLoadAttributes(ctx context.Context) error { - if file.entry == nil || !file.isOpen { - item := file.wfs.listDirectoryEntriesCache.Get(file.fullpath()) - if item != nil && !item.Expired() { - entry := item.Value().(*filer_pb.Entry) - file.entry = entry - // glog.V(1).Infof("file attr read cached %v attributes", file.Name) - } else { - err := file.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { - - request := &filer_pb.LookupDirectoryEntryRequest{ - Name: file.Name, - Directory: file.dir.Path, - } +func (file *File) Forget() { + glog.V(3).Infof("Forget file %s/%s", file.dir.Path, file.Name) - resp, err := client.LookupDirectoryEntry(ctx, request) - if err != nil { - glog.V(0).Infof("file attr read file %v: %v", request, err) - return err - } - - file.entry = resp.Entry - - glog.V(1).Infof("file attr %v %+v: %d", file.fullpath(), file.entry.Attributes, filer2.TotalSize(file.entry.Chunks)) + file.wfs.forgetNode(filer2.NewFullPath(file.dir.Path, file.Name)) - return nil - }) +} - if err != nil { - return err - } +func (file *File) maybeLoadEntry(ctx context.Context) error { + if file.entry == nil || file.isOpen <= 0 { + entry, err := file.wfs.maybeLoadEntry(file.dir.Path, file.Name) + if err != nil { + return err + } + if entry != nil { + file.setEntry(entry) } } return nil } + +func (file *File) addChunks(chunks []*filer_pb.FileChunk) { + + sort.Slice(chunks, func(i, j int) bool { + return chunks[i].Mtime < chunks[j].Mtime + }) + + var newVisibles []filer2.VisibleInterval + for _, chunk := range chunks { + newVisibles = filer2.MergeIntoVisibles(file.entryViewCache, newVisibles, chunk) + t := file.entryViewCache[:0] + file.entryViewCache = newVisibles + newVisibles = t + } + + glog.V(3).Infof("%s existing %d chunks adds %d more", file.fullpath(), len(file.entry.Chunks), len(chunks)) + + file.entry.Chunks = append(file.entry.Chunks, chunks...) +} + +func (file *File) setEntry(entry *filer_pb.Entry) { + file.entry = entry + file.entryViewCache = filer2.NonOverlappingVisibleIntervals(file.entry.Chunks) +} + +func (file *File) saveEntry() error { + return file.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.UpdateEntryRequest{ + Directory: file.dir.Path, + Entry: file.entry, + } + + glog.V(1).Infof("save file entry: %v", request) + _, err := client.UpdateEntry(context.Background(), request) + if err != nil { + glog.V(0).Infof("UpdateEntry file %s/%s: %v", file.dir.Path, file.Name, err) + return fuse.EIO + } + + return nil + }) +} diff --git a/weed/filesys/filehandle.go b/weed/filesys/filehandle.go index cc2c070eb..100c9eba0 100644 --- a/weed/filesys/filehandle.go +++ b/weed/filesys/filehandle.go @@ -1,17 +1,19 @@ package filesys import ( - "bazil.org/fuse" - "bazil.org/fuse/fs" "context" "fmt" + "mime" + "path" + "time" + + "github.com/gabriel-vasile/mimetype" + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/chrislusf/seaweedfs/weed/util" - "net/http" - "strings" - "sync" + "github.com/seaweedfs/fuse" + "github.com/seaweedfs/fuse/fs" ) type FileHandle struct { @@ -49,85 +51,51 @@ func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fus glog.V(4).Infof("%s read fh %d: [%d,%d)", fh.f.fullpath(), fh.handle, req.Offset, req.Offset+int64(req.Size)) - // this value should come from the filer instead of the old f - if len(fh.f.entry.Chunks) == 0 { - glog.V(1).Infof("empty fh %v/%v", fh.f.dir.Path, fh.f.Name) - return nil - } - buff := make([]byte, req.Size) - chunkViews := filer2.ViewFromChunks(fh.f.entry.Chunks, req.Offset, req.Size) - - var vids []string - for _, chunkView := range chunkViews { - vids = append(vids, volumeId(chunkView.FileId)) - } - - vid2Locations := make(map[string]*filer_pb.Locations) - - err := fh.f.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { - - glog.V(4).Infof("read fh lookup volume id locations: %v", vids) - resp, err := client.LookupVolume(ctx, &filer_pb.LookupVolumeRequest{ - VolumeIds: vids, - }) - if err != nil { - return err + totalRead, err := fh.readFromChunks(buff, req.Offset) + if err == nil { + dirtyOffset, dirtySize := fh.readFromDirtyPages(buff, req.Offset) + if totalRead+req.Offset < dirtyOffset+int64(dirtySize) { + totalRead = dirtyOffset + int64(dirtySize) - req.Offset } + } - vid2Locations = resp.LocationsMap - - return nil - }) + resp.Data = buff[:totalRead] if err != nil { - glog.V(4).Infof("%v/%v read fh lookup volume ids: %v", fh.f.dir.Path, fh.f.Name, err) - return fmt.Errorf("failed to lookup volume ids %v: %v", vids, err) + glog.Errorf("file handle read %s: %v", fh.f.fullpath(), err) + return fuse.EIO } - var totalRead int64 - var wg sync.WaitGroup - for _, chunkView := range chunkViews { - wg.Add(1) - go func(chunkView *filer2.ChunkView) { - defer wg.Done() - - glog.V(4).Infof("read fh reading chunk: %+v", chunkView) + return err +} - locations := vid2Locations[volumeId(chunkView.FileId)] - if locations == nil || len(locations.Locations) == 0 { - glog.V(0).Infof("failed to locate %s", chunkView.FileId) - err = fmt.Errorf("failed to locate %s", chunkView.FileId) - return - } +func (fh *FileHandle) readFromDirtyPages(buff []byte, startOffset int64) (offset int64, size int) { + return fh.dirtyPages.ReadDirtyData(buff, startOffset) +} - var n int64 - n, err = util.ReadUrl( - fmt.Sprintf("http://%s/%s", locations.Locations[0].Url, chunkView.FileId), - chunkView.Offset, - int(chunkView.Size), - buff[chunkView.LogicOffset-req.Offset:chunkView.LogicOffset-req.Offset+int64(chunkView.Size)]) +func (fh *FileHandle) readFromChunks(buff []byte, offset int64) (int64, error) { - if err != nil { + // this value should come from the filer instead of the old f + if len(fh.f.entry.Chunks) == 0 { + glog.V(1).Infof("empty fh %v", fh.f.fullpath()) + return 0, nil + } - glog.V(0).Infof("%v/%v read http://%s/%v %v bytes: %v", fh.f.dir.Path, fh.f.Name, locations.Locations[0].Url, chunkView.FileId, n, err) + if fh.f.entryViewCache == nil { + fh.f.entryViewCache = filer2.NonOverlappingVisibleIntervals(fh.f.entry.Chunks) + } - err = fmt.Errorf("failed to read http://%s/%s: %v", - locations.Locations[0].Url, chunkView.FileId, err) - return - } + chunkViews := filer2.ViewFromVisibleIntervals(fh.f.entryViewCache, offset, len(buff)) - glog.V(4).Infof("read fh read %d bytes: %+v", n, chunkView) - totalRead += n + totalRead, err := filer2.ReadIntoBuffer(fh.f.wfs, fh.f.fullpath(), buff, chunkViews, offset) - }(chunkView) + if err != nil { + glog.Errorf("file handle read %s: %v", fh.f.fullpath(), err) } - wg.Wait() - resp.Data = buff[:totalRead] - - return err + return totalRead, err } // Write to the file handle @@ -135,24 +103,32 @@ func (fh *FileHandle) Write(ctx context.Context, req *fuse.WriteRequest, resp *f // write the request to volume servers - glog.V(4).Infof("%+v/%v write fh %d: [%d,%d)", fh.f.dir.Path, fh.f.Name, fh.handle, req.Offset, req.Offset+int64(len(req.Data))) + fh.f.entry.Attributes.FileSize = uint64(max(req.Offset+int64(len(req.Data)), int64(fh.f.entry.Attributes.FileSize))) + // glog.V(0).Infof("%v write [%d,%d)", fh.f.fullpath(), req.Offset, req.Offset+int64(len(req.Data))) - chunks, err := fh.dirtyPages.AddPage(ctx, req.Offset, req.Data) + chunks, err := fh.dirtyPages.AddPage(req.Offset, req.Data) if err != nil { - glog.Errorf("%+v/%v write fh %d: [%d,%d): %v", fh.f.dir.Path, fh.f.Name, fh.handle, req.Offset, req.Offset+int64(len(req.Data)), err) - return fmt.Errorf("write %s/%s at [%d,%d): %v", fh.f.dir.Path, fh.f.Name, req.Offset, req.Offset+int64(len(req.Data)), err) + glog.Errorf("%v write fh %d: [%d,%d): %v", fh.f.fullpath(), fh.handle, req.Offset, req.Offset+int64(len(req.Data)), err) + return fuse.EIO } resp.Size = len(req.Data) if req.Offset == 0 { - fh.contentType = http.DetectContentType(req.Data) + // detect mime type + detectedMIME := mimetype.Detect(req.Data) + fh.contentType = detectedMIME.String() + if ext := path.Ext(fh.f.Name); ext != detectedMIME.Extension() { + fh.contentType = mime.TypeByExtension(ext) + } + fh.dirtyMetadata = true } - for _, chunk := range chunks { - fh.f.entry.Chunks = append(fh.f.entry.Chunks, chunk) - glog.V(1).Infof("uploaded %s/%s to %s [%d,%d)", fh.f.dir.Path, fh.f.Name, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size)) + if len(chunks) > 0 { + + fh.f.addChunks(chunks) + fh.dirtyMetadata = true } @@ -163,39 +139,47 @@ func (fh *FileHandle) Release(ctx context.Context, req *fuse.ReleaseRequest) err glog.V(4).Infof("%v release fh %d", fh.f.fullpath(), fh.handle) - fh.f.wfs.ReleaseHandle(fuse.HandleID(fh.handle)) + fh.f.isOpen-- - fh.f.isOpen = false + if fh.f.isOpen <= 0 { + fh.dirtyPages.releaseResource() + fh.f.wfs.ReleaseHandle(fh.f.fullpath(), fuse.HandleID(fh.handle)) + } return nil } -// Flush - experimenting with uploading at flush, this slows operations down till it has been -// completely flushed func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error { // fflush works at fh level // send the data to the OS glog.V(4).Infof("%s fh %d flush %v", fh.f.fullpath(), fh.handle, req) - chunk, err := fh.dirtyPages.FlushToStorage(ctx) + chunks, err := fh.dirtyPages.FlushToStorage() if err != nil { - glog.Errorf("flush %s/%s to %s [%d,%d): %v", fh.f.dir.Path, fh.f.Name, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size), err) - return fmt.Errorf("flush %s/%s to %s [%d,%d): %v", fh.f.dir.Path, fh.f.Name, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size), err) + glog.Errorf("flush %s: %v", fh.f.fullpath(), err) + return fuse.EIO } - if chunk != nil { - fh.f.entry.Chunks = append(fh.f.entry.Chunks, chunk) + + fh.f.addChunks(chunks) + if len(chunks) > 0 { + fh.dirtyMetadata = true } if !fh.dirtyMetadata { return nil } - return fh.f.wfs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + err = fh.f.wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { if fh.f.entry.Attributes != nil { fh.f.entry.Attributes.Mime = fh.contentType fh.f.entry.Attributes.Uid = req.Uid fh.f.entry.Attributes.Gid = req.Gid + fh.f.entry.Attributes.Mtime = time.Now().Unix() + fh.f.entry.Attributes.Crtime = time.Now().Unix() + fh.f.entry.Attributes.FileMode = uint32(0777 &^ fh.f.wfs.option.Umask) + fh.f.entry.Attributes.Collection = fh.dirtyPages.collection + fh.f.entry.Attributes.Replication = fh.dirtyPages.replication } request := &filer_pb.CreateEntryRequest{ @@ -203,22 +187,36 @@ func (fh *FileHandle) Flush(ctx context.Context, req *fuse.FlushRequest) error { Entry: fh.f.entry, } - glog.V(1).Infof("%s/%s set chunks: %v", fh.f.dir.Path, fh.f.Name, len(fh.f.entry.Chunks)) + glog.V(3).Infof("%s set chunks: %v", fh.f.fullpath(), len(fh.f.entry.Chunks)) for i, chunk := range fh.f.entry.Chunks { - glog.V(1).Infof("%s/%s chunks %d: %v [%d,%d)", fh.f.dir.Path, fh.f.Name, i, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size)) + glog.V(3).Infof("%s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size)) + } + + chunks, garbages := filer2.CompactFileChunks(fh.f.entry.Chunks) + fh.f.entry.Chunks = chunks + // fh.f.entryViewCache = nil + + if err := filer_pb.CreateEntry(client, request); err != nil { + glog.Errorf("fh flush create %s: %v", fh.f.fullpath(), err) + return fmt.Errorf("fh flush create %s: %v", fh.f.fullpath(), err) } - if _, err := client.CreateEntry(ctx, request); err != nil { - return fmt.Errorf("update fh: %v", err) + + fh.f.wfs.deleteFileChunks(garbages) + for i, chunk := range garbages { + glog.V(3).Infof("garbage %s chunks %d: %v [%d,%d)", fh.f.fullpath(), i, chunk.FileId, chunk.Offset, chunk.Offset+int64(chunk.Size)) } return nil }) -} -func volumeId(fileId string) string { - lastCommaIndex := strings.LastIndex(fileId, ",") - if lastCommaIndex > 0 { - return fileId[:lastCommaIndex] + if err == nil { + fh.dirtyMetadata = false } - return fileId + + if err != nil { + glog.Errorf("%v fh %d flush: %v", fh.f.fullpath(), fh.handle, err) + return fuse.EIO + } + + return nil } diff --git a/weed/filesys/wfs.go b/weed/filesys/wfs.go index 9b1b98ebf..77438b58e 100644 --- a/weed/filesys/wfs.go +++ b/weed/filesys/wfs.go @@ -1,109 +1,268 @@ package filesys import ( - "bazil.org/fuse" - "bazil.org/fuse/fs" + "context" "fmt" + "math" + "os" + "strings" + "sync" + "time" + + "github.com/karlseguin/ccache" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/chrislusf/seaweedfs/weed/util" - "github.com/karlseguin/ccache" - "sync" + "github.com/seaweedfs/fuse" + "github.com/seaweedfs/fuse/fs" ) type Option struct { FilerGrpcAddress string + GrpcDialOption grpc.DialOption FilerMountRootPath string Collection string Replication string TtlSec int32 ChunkSizeLimit int64 DataCenter string - DirListingLimit int + DirListCacheLimit int64 + EntryCacheTtl time.Duration + Umask os.FileMode + + MountUid uint32 + MountGid uint32 + MountMode os.FileMode + MountCtime time.Time + MountMtime time.Time + + OutsideContainerClusterMode bool // whether the mount runs outside SeaweedFS containers + Cipher bool // whether encrypt data on volume server + } +var _ = fs.FS(&WFS{}) +var _ = fs.FSStatfser(&WFS{}) + type WFS struct { option *Option listDirectoryEntriesCache *ccache.Cache - // contains all open handles + // contains all open handles, protected by handlesLock + handlesLock sync.Mutex handles []*FileHandle - pathToHandleIndex map[string]int - pathToHandleLock sync.Mutex + pathToHandleIndex map[filer2.FullPath]int + + bufPool sync.Pool + + stats statsCache + + // nodes, protected by nodesLock + nodesLock sync.Mutex + nodes map[uint64]fs.Node + root fs.Node +} +type statsCache struct { + filer_pb.StatisticsResponse + lastChecked int64 // unix time in seconds } func NewSeaweedFileSystem(option *Option) *WFS { - return &WFS{ - option: option, - listDirectoryEntriesCache: ccache.New(ccache.Configure().MaxSize(6000).ItemsToPrune(100)), - pathToHandleIndex: make(map[string]int), + wfs := &WFS{ + option: option, + listDirectoryEntriesCache: ccache.New(ccache.Configure().MaxSize(option.DirListCacheLimit * 3).ItemsToPrune(100)), + pathToHandleIndex: make(map[filer2.FullPath]int), + bufPool: sync.Pool{ + New: func() interface{} { + return make([]byte, option.ChunkSizeLimit) + }, + }, + nodes: make(map[uint64]fs.Node), } + + wfs.root = &Dir{Path: wfs.option.FilerMountRootPath, wfs: wfs} + + return wfs } func (wfs *WFS) Root() (fs.Node, error) { - return &Dir{Path: wfs.option.FilerMountRootPath, wfs: wfs}, nil + return wfs.root, nil } -func (wfs *WFS) withFilerClient(fn func(filer_pb.SeaweedFilerClient) error) error { +func (wfs *WFS) WithFilerClient(fn func(filer_pb.SeaweedFilerClient) error) error { - grpcConnection, err := util.GrpcDial(wfs.option.FilerGrpcAddress) - if err != nil { - return fmt.Errorf("fail to dial %s: %v", wfs.option.FilerGrpcAddress, err) - } - defer grpcConnection.Close() + err := pb.WithCachedGrpcClient(func(grpcConnection *grpc.ClientConn) error { + client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return fn(client) + }, wfs.option.FilerGrpcAddress, wfs.option.GrpcDialOption) - client := filer_pb.NewSeaweedFilerClient(grpcConnection) + if err == nil { + return nil + } + return err - return fn(client) } func (wfs *WFS) AcquireHandle(file *File, uid, gid uint32) (fileHandle *FileHandle) { - wfs.pathToHandleLock.Lock() - defer wfs.pathToHandleLock.Unlock() fullpath := file.fullpath() + glog.V(4).Infof("%s AcquireHandle uid=%d gid=%d", fullpath, uid, gid) + + wfs.handlesLock.Lock() + defer wfs.handlesLock.Unlock() index, found := wfs.pathToHandleIndex[fullpath] if found && wfs.handles[index] != nil { - glog.V(4).Infoln(fullpath, "found fileHandle id", index) + glog.V(2).Infoln(fullpath, "found fileHandle id", index) return wfs.handles[index] } fileHandle = newFileHandle(file, uid, gid) - - if found && wfs.handles[index] != nil { - glog.V(4).Infoln(fullpath, "reuse previous fileHandle id", index) - wfs.handles[index] = fileHandle - fileHandle.handle = uint64(index) - return - } - for i, h := range wfs.handles { if h == nil { wfs.handles[i] = fileHandle fileHandle.handle = uint64(i) wfs.pathToHandleIndex[fullpath] = i - glog.V(4).Infoln(fullpath, "reuse fileHandle id", fileHandle.handle) + glog.V(4).Infof("%s reuse fh %d", fullpath, fileHandle.handle) return } } wfs.handles = append(wfs.handles, fileHandle) fileHandle.handle = uint64(len(wfs.handles) - 1) - glog.V(4).Infoln(fullpath, "new fileHandle id", fileHandle.handle) wfs.pathToHandleIndex[fullpath] = int(fileHandle.handle) + glog.V(4).Infof("%s new fh %d", fullpath, fileHandle.handle) return } -func (wfs *WFS) ReleaseHandle(handleId fuse.HandleID) { - wfs.pathToHandleLock.Lock() - defer wfs.pathToHandleLock.Unlock() +func (wfs *WFS) ReleaseHandle(fullpath filer2.FullPath, handleId fuse.HandleID) { + wfs.handlesLock.Lock() + defer wfs.handlesLock.Unlock() - glog.V(4).Infoln("releasing handle id", handleId, "current handles lengh", len(wfs.handles)) + glog.V(4).Infof("%s ReleaseHandle id %d current handles length %d", fullpath, handleId, len(wfs.handles)) + delete(wfs.pathToHandleIndex, fullpath) if int(handleId) < len(wfs.handles) { wfs.handles[int(handleId)] = nil } return } + +// Statfs is called to obtain file system metadata. Implements fuse.FSStatfser +func (wfs *WFS) Statfs(ctx context.Context, req *fuse.StatfsRequest, resp *fuse.StatfsResponse) error { + + glog.V(4).Infof("reading fs stats: %+v", req) + + if wfs.stats.lastChecked < time.Now().Unix()-20 { + + err := wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.StatisticsRequest{ + Collection: wfs.option.Collection, + Replication: wfs.option.Replication, + Ttl: fmt.Sprintf("%ds", wfs.option.TtlSec), + } + + glog.V(4).Infof("reading filer stats: %+v", request) + resp, err := client.Statistics(context.Background(), request) + if err != nil { + glog.V(0).Infof("reading filer stats %v: %v", request, err) + return err + } + glog.V(4).Infof("read filer stats: %+v", resp) + + wfs.stats.TotalSize = resp.TotalSize + wfs.stats.UsedSize = resp.UsedSize + wfs.stats.FileCount = resp.FileCount + wfs.stats.lastChecked = time.Now().Unix() + + return nil + }) + if err != nil { + glog.V(0).Infof("filer Statistics: %v", err) + return err + } + } + + totalDiskSize := wfs.stats.TotalSize + usedDiskSize := wfs.stats.UsedSize + actualFileCount := wfs.stats.FileCount + + // Compute the total number of available blocks + resp.Blocks = totalDiskSize / blockSize + + // Compute the number of used blocks + numBlocks := uint64(usedDiskSize / blockSize) + + // Report the number of free and available blocks for the block size + resp.Bfree = resp.Blocks - numBlocks + resp.Bavail = resp.Blocks - numBlocks + resp.Bsize = uint32(blockSize) + + // Report the total number of possible files in the file system (and those free) + resp.Files = math.MaxInt64 + resp.Ffree = math.MaxInt64 - actualFileCount + + // Report the maximum length of a name and the minimum fragment size + resp.Namelen = 1024 + resp.Frsize = uint32(blockSize) + + return nil +} + +func (wfs *WFS) cacheGet(path filer2.FullPath) *filer_pb.Entry { + item := wfs.listDirectoryEntriesCache.Get(string(path)) + if item != nil && !item.Expired() { + return item.Value().(*filer_pb.Entry) + } + return nil +} +func (wfs *WFS) cacheSet(path filer2.FullPath, entry *filer_pb.Entry, ttl time.Duration) { + if entry == nil { + wfs.listDirectoryEntriesCache.Delete(string(path)) + } else { + wfs.listDirectoryEntriesCache.Set(string(path), entry, ttl) + } +} +func (wfs *WFS) cacheDelete(path filer2.FullPath) { + wfs.listDirectoryEntriesCache.Delete(string(path)) +} + +func (wfs *WFS) getNode(fullpath filer2.FullPath, fn func() fs.Node) fs.Node { + wfs.nodesLock.Lock() + defer wfs.nodesLock.Unlock() + + node, found := wfs.nodes[fullpath.AsInode()] + if found { + return node + } + node = fn() + if node != nil { + wfs.nodes[fullpath.AsInode()] = node + } + return node +} + +func (wfs *WFS) forgetNode(fullpath filer2.FullPath) { + wfs.nodesLock.Lock() + defer wfs.nodesLock.Unlock() + + delete(wfs.nodes, fullpath.AsInode()) +} + +func (wfs *WFS) AdjustedUrl(hostAndPort string) string { + if !wfs.option.OutsideContainerClusterMode { + return hostAndPort + } + commaIndex := strings.Index(hostAndPort, ":") + if commaIndex < 0 { + return hostAndPort + } + filerCommaIndex := strings.Index(wfs.option.FilerGrpcAddress, ":") + return fmt.Sprintf("%s:%s", wfs.option.FilerGrpcAddress[:filerCommaIndex], hostAndPort[commaIndex+1:]) + +} diff --git a/weed/filesys/wfs_deletion.go b/weed/filesys/wfs_deletion.go new file mode 100644 index 000000000..bf21b1808 --- /dev/null +++ b/weed/filesys/wfs_deletion.go @@ -0,0 +1,73 @@ +package filesys + +import ( + "context" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func (wfs *WFS) deleteFileChunks(chunks []*filer_pb.FileChunk) { + if len(chunks) == 0 { + return + } + + var fileIds []string + for _, chunk := range chunks { + fileIds = append(fileIds, chunk.GetFileIdString()) + } + + wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + wfs.deleteFileIds(wfs.option.GrpcDialOption, client, fileIds) + return nil + }) +} + +func (wfs *WFS) deleteFileIds(grpcDialOption grpc.DialOption, client filer_pb.SeaweedFilerClient, fileIds []string) error { + + var vids []string + for _, fileId := range fileIds { + vids = append(vids, filer2.VolumeId(fileId)) + } + + lookupFunc := func(vids []string) (map[string]operation.LookupResult, error) { + + m := make(map[string]operation.LookupResult) + + glog.V(4).Infof("remove file lookup volume id locations: %v", vids) + resp, err := client.LookupVolume(context.Background(), &filer_pb.LookupVolumeRequest{ + VolumeIds: vids, + }) + if err != nil { + return m, err + } + + for _, vid := range vids { + lr := operation.LookupResult{ + VolumeId: vid, + Locations: nil, + } + locations, found := resp.LocationsMap[vid] + if !found { + continue + } + for _, loc := range locations.Locations { + lr.Locations = append(lr.Locations, operation.Location{ + Url: wfs.AdjustedUrl(loc.Url), + PublicUrl: loc.PublicUrl, + }) + } + m[vid] = lr + } + + return m, err + } + + _, err := operation.DeleteFilesWithLookupVolumeId(grpcDialOption, fileIds, lookupFunc) + + return err +} diff --git a/weed/filesys/xattr.go b/weed/filesys/xattr.go new file mode 100644 index 000000000..af154a7ee --- /dev/null +++ b/weed/filesys/xattr.go @@ -0,0 +1,141 @@ +package filesys + +import ( + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/fuse" +) + +func getxattr(entry *filer_pb.Entry, req *fuse.GetxattrRequest, resp *fuse.GetxattrResponse) error { + + if entry == nil { + return fuse.ErrNoXattr + } + if entry.Extended == nil { + return fuse.ErrNoXattr + } + data, found := entry.Extended[req.Name] + if !found { + return fuse.ErrNoXattr + } + if req.Position < uint32(len(data)) { + size := req.Size + if req.Position+size >= uint32(len(data)) { + size = uint32(len(data)) - req.Position + } + if size == 0 { + resp.Xattr = data[req.Position:] + } else { + resp.Xattr = data[req.Position : req.Position+size] + } + } + + return nil + +} + +func setxattr(entry *filer_pb.Entry, req *fuse.SetxattrRequest) error { + + if entry == nil { + return fuse.EIO + } + + if entry.Extended == nil { + entry.Extended = make(map[string][]byte) + } + data, _ := entry.Extended[req.Name] + + newData := make([]byte, int(req.Position)+len(req.Xattr)) + + copy(newData, data) + + copy(newData[int(req.Position):], req.Xattr) + + entry.Extended[req.Name] = newData + + return nil + +} + +func removexattr(entry *filer_pb.Entry, req *fuse.RemovexattrRequest) error { + + if entry == nil { + return fuse.ErrNoXattr + } + + if entry.Extended == nil { + return fuse.ErrNoXattr + } + + _, found := entry.Extended[req.Name] + + if !found { + return fuse.ErrNoXattr + } + + delete(entry.Extended, req.Name) + + return nil + +} + +func listxattr(entry *filer_pb.Entry, req *fuse.ListxattrRequest, resp *fuse.ListxattrResponse) error { + + if entry == nil { + return fuse.EIO + } + + for k := range entry.Extended { + resp.Append(k) + } + + size := req.Size + if req.Position+size >= uint32(len(resp.Xattr)) { + size = uint32(len(resp.Xattr)) - req.Position + } + + if size == 0 { + resp.Xattr = resp.Xattr[req.Position:] + } else { + resp.Xattr = resp.Xattr[req.Position : req.Position+size] + } + + return nil + +} + +func (wfs *WFS) maybeLoadEntry(dir, name string) (entry *filer_pb.Entry, err error) { + + fullpath := filer2.NewFullPath(dir, name) + entry = wfs.cacheGet(fullpath) + if entry != nil { + return + } + // glog.V(3).Infof("read entry cache miss %s", fullpath) + + err = wfs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.LookupDirectoryEntryRequest{ + Name: name, + Directory: dir, + } + + resp, err := filer_pb.LookupEntry(client, request) + if err != nil { + if err == filer_pb.ErrNotFound { + glog.V(3).Infof("file attr read not found file %v: %v", request, err) + return fuse.ENOENT + } + glog.V(3).Infof("attr read %v: %v", request, err) + return fuse.EIO + } + + entry = resp.Entry + wfs.cacheSet(fullpath, entry, wfs.option.EntryCacheTtl) + + return nil + }) + + return +} diff --git a/weed/glide.lock b/weed/glide.lock deleted file mode 100644 index 7c5205744..000000000 --- a/weed/glide.lock +++ /dev/null @@ -1,190 +0,0 @@ -hash: 2e3a065472829938d25e879451b6d1aa43e55270e1166a9c044803ef8a3b9eb1 -updated: 2018-06-28T22:01:35.910567-07:00 -imports: -- name: bazil.org/fuse - version: 65cc252bf6691cb3c7014bcb2c8dc29de91e3a7e - subpackages: - - fs - - fuseutil -- name: github.com/boltdb/bolt - version: 2f1ce7a837dcb8da3ec595b1dac9d0632f0f99e8 -- name: github.com/chrislusf/raft - version: 5f7ddd8f479583daf05879d3d3b174aa202c8fb7 - subpackages: - - protobuf -- name: github.com/dgrijalva/jwt-go - version: 06ea1031745cb8b3dab3f6a236daf2b0aa468b7e -- name: github.com/disintegration/imaging - version: bbcee2f5c9d5e94ca42c8b50ec847fec64a6c134 -- name: github.com/fsnotify/fsnotify - version: c2828203cd70a50dcccfb2761f8b1f8ceef9a8e9 -- name: github.com/go-redis/redis - version: 83fb42932f6145ce52df09860384a4653d2d332a - subpackages: - - internal - - internal/consistenthash - - internal/hashtag - - internal/pool - - internal/proto - - internal/singleflight - - internal/util -- name: github.com/go-sql-driver/mysql - version: d523deb1b23d913de5bdada721a6071e71283618 -- name: github.com/gocql/gocql - version: e06f8c1bcd787e6bf0608288b314522f08cc7848 - subpackages: - - internal/lru - - internal/murmur - - internal/streams -- name: github.com/gogo/protobuf - version: 30cf7ac33676b5786e78c746683f0d4cd64fa75b - subpackages: - - proto -- name: github.com/golang/protobuf - version: b4deda0973fb4c70b50d226b1af49f3da59f5265 - subpackages: - - proto - - protoc-gen-go/descriptor - - ptypes - - ptypes/any - - ptypes/duration - - ptypes/timestamp -- name: github.com/golang/snappy - version: 2e65f85255dbc3072edf28d6b5b8efc472979f5a -- name: github.com/google/btree - version: e89373fe6b4a7413d7acd6da1725b83ef713e6e4 -- name: github.com/gorilla/context - version: 08b5f424b9271eedf6f9f0ce86cb9396ed337a42 -- name: github.com/gorilla/mux - version: e3702bed27f0d39777b0b37b664b6280e8ef8fbf -- name: github.com/hailocab/go-hostpool - version: e80d13ce29ede4452c43dea11e79b9bc8a15b478 -- name: github.com/hashicorp/hcl - version: ef8a98b0bbce4a65b5aa4c368430a80ddc533168 - subpackages: - - hcl/ast - - hcl/parser - - hcl/printer - - hcl/scanner - - hcl/strconv - - hcl/token - - json/parser - - json/scanner - - json/token -- name: github.com/karlseguin/ccache - version: b425c9ca005a2050ebe723f6a0cddcb907354ab7 -- name: github.com/klauspost/crc32 - version: cb6bfca970f6908083f26f39a79009d608efd5cd -- name: github.com/lib/pq - version: 90697d60dd844d5ef6ff15135d0203f65d2f53b8 - subpackages: - - oid -- name: github.com/magiconair/properties - version: c2353362d570a7bfa228149c62842019201cfb71 -- name: github.com/mitchellh/mapstructure - version: bb74f1db0675b241733089d5a1faa5dd8b0ef57b -- name: github.com/pelletier/go-toml - version: c01d1270ff3e442a8a57cddc1c92dc1138598194 -- name: github.com/rwcarlsen/goexif - version: 8d986c03457a2057c7b0fb0a48113f7dd48f9619 - subpackages: - - exif - - tiff -- name: github.com/soheilhy/cmux - version: e09e9389d85d8492d313d73d1469c029e710623f -- name: github.com/spf13/afero - version: 787d034dfe70e44075ccc060d346146ef53270ad - subpackages: - - mem -- name: github.com/spf13/cast - version: 8965335b8c7107321228e3e3702cab9832751bac -- name: github.com/spf13/jwalterweatherman - version: 7c0cea34c8ece3fbeb2b27ab9b59511d360fb394 -- name: github.com/spf13/pflag - version: 3ebe029320b2676d667ae88da602a5f854788a8a -- name: github.com/spf13/viper - version: 15738813a09db5c8e5b60a19d67d3f9bd38da3a4 -- name: github.com/syndtr/goleveldb - version: 0d5a0ceb10cf9ab89fdd744cc8c50a83134f6697 - subpackages: - - leveldb - - leveldb/cache - - leveldb/comparer - - leveldb/errors - - leveldb/filter - - leveldb/iterator - - leveldb/journal - - leveldb/memdb - - leveldb/opt - - leveldb/storage - - leveldb/table - - leveldb/util -- name: golang.org/x/image - version: cc896f830cedae125428bc9fe1b0362aa91b3fb1 - subpackages: - - bmp - - tiff - - tiff/lzw -- name: golang.org/x/net - version: 4cb1c02c05b0e749b0365f61ae859a8e0cfceed9 - subpackages: - - context - - http/httpguts - - http2 - - http2/hpack - - idna - - internal/timeseries - - trace -- name: golang.org/x/sys - version: 7138fd3d9dc8335c567ca206f4333fb75eb05d56 - subpackages: - - unix -- name: golang.org/x/text - version: 5cec4b58c438bd98288aeb248bab2c1840713d21 - subpackages: - - secure/bidirule - - transform - - unicode/bidi - - unicode/norm -- name: google.golang.org/appengine - version: b1f26356af11148e710935ed1ac8a7f5702c7612 - subpackages: - - cloudsql -- name: google.golang.org/genproto - version: ff3583edef7de132f219f0efc00e097cabcc0ec0 - subpackages: - - googleapis/rpc/status -- name: google.golang.org/grpc - version: 168a6198bcb0ef175f7dacec0b8691fc141dc9b8 - subpackages: - - balancer - - balancer/base - - balancer/roundrobin - - codes - - connectivity - - credentials - - encoding - - encoding/proto - - grpclog - - internal - - internal/backoff - - internal/channelz - - internal/grpcrand - - keepalive - - metadata - - naming - - peer - - reflection - - reflection/grpc_reflection_v1alpha - - resolver - - resolver/dns - - resolver/passthrough - - stats - - status - - tap - - transport -- name: gopkg.in/inf.v0 - version: d2d2541c53f18d2a059457998ce2876cc8e67cbf -- name: gopkg.in/yaml.v2 - version: 5420a8b6744d3b0345ab293f6fcba19c978f1183 -testImports: [] diff --git a/weed/glide.yaml b/weed/glide.yaml deleted file mode 100644 index 7903a6728..000000000 --- a/weed/glide.yaml +++ /dev/null @@ -1,44 +0,0 @@ -package: github.com/chrislusf/seaweedfs/weed -import: -- package: bazil.org/fuse - subpackages: - - fs -- package: github.com/boltdb/bolt - version: ^1.3.1 -- package: github.com/chrislusf/raft -- package: github.com/dgrijalva/jwt-go - version: ^3.2.0 -- package: github.com/disintegration/imaging - version: ^1.4.1 -- package: github.com/go-redis/redis - version: ^6.10.2 -- package: github.com/go-sql-driver/mysql - version: ^1.3.0 -- package: github.com/gocql/gocql -- package: github.com/golang/protobuf - version: ^1.0.0 - subpackages: - - proto -- package: github.com/google/btree -- package: github.com/gorilla/mux - version: ^1.6.1 -- package: github.com/klauspost/crc32 - version: ^1.1.0 -- package: github.com/lib/pq -- package: github.com/rwcarlsen/goexif - subpackages: - - exif -- package: github.com/soheilhy/cmux - version: ^0.1.4 -- package: github.com/syndtr/goleveldb - subpackages: - - leveldb - - leveldb/util -- package: golang.org/x/net - subpackages: - - context -- package: google.golang.org/grpc - version: ^1.11.3 - subpackages: - - peer - - reflection diff --git a/weed/glog/glog.go b/weed/glog/glog.go index abd5678d4..f46632f1c 100644 --- a/weed/glog/glog.go +++ b/weed/glog/glog.go @@ -46,7 +46,7 @@ // -stderrthreshold=ERROR // Log events at or above this severity are logged to standard // error as well as to files. -// -log_dir="" +// -logdir="" // Log files will be written to this directory instead of the // default temporary directory. // diff --git a/weed/glog/glog_file.go b/weed/glog/glog_file.go index 65075d281..bb8e6902f 100644 --- a/weed/glog/glog_file.go +++ b/weed/glog/glog_file.go @@ -38,7 +38,7 @@ var logDirs []string // If non-empty, overrides the choice of directory in which to write logs. // See createLogDirs for the full list of possible destinations. -var logDir = flag.String("log_dir", "", "If non-empty, write log files in this directory") +var logDir = flag.String("logdir", "", "If non-empty, write log files in this directory") func createLogDirs() { if *logDir != "" { diff --git a/weed/notification/aws_sqs/aws_sqs_pub.go b/weed/notification/aws_sqs/aws_sqs_pub.go new file mode 100644 index 000000000..d881049dd --- /dev/null +++ b/weed/notification/aws_sqs/aws_sqs_pub.go @@ -0,0 +1,92 @@ +package aws_sqs + +import ( + "fmt" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/sqs" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/notification" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/golang/protobuf/proto" +) + +func init() { + notification.MessageQueues = append(notification.MessageQueues, &AwsSqsPub{}) +} + +type AwsSqsPub struct { + svc *sqs.SQS + queueUrl string +} + +func (k *AwsSqsPub) GetName() string { + return "aws_sqs" +} + +func (k *AwsSqsPub) Initialize(configuration util.Configuration, prefix string) (err error) { + glog.V(0).Infof("filer.notification.aws_sqs.region: %v", configuration.GetString(prefix+"region")) + glog.V(0).Infof("filer.notification.aws_sqs.sqs_queue_name: %v", configuration.GetString(prefix+"sqs_queue_name")) + return k.initialize( + configuration.GetString(prefix+"aws_access_key_id"), + configuration.GetString(prefix+"aws_secret_access_key"), + configuration.GetString(prefix+"region"), + configuration.GetString(prefix+"sqs_queue_name"), + ) +} + +func (k *AwsSqsPub) initialize(awsAccessKeyId, awsSecretAccessKey, region, queueName string) (err error) { + + config := &aws.Config{ + Region: aws.String(region), + } + if awsAccessKeyId != "" && awsSecretAccessKey != "" { + config.Credentials = credentials.NewStaticCredentials(awsAccessKeyId, awsSecretAccessKey, "") + } + + sess, err := session.NewSession(config) + if err != nil { + return fmt.Errorf("create aws session: %v", err) + } + k.svc = sqs.New(sess) + + result, err := k.svc.GetQueueUrl(&sqs.GetQueueUrlInput{ + QueueName: aws.String(queueName), + }) + if err != nil { + if aerr, ok := err.(awserr.Error); ok && aerr.Code() == sqs.ErrCodeQueueDoesNotExist { + return fmt.Errorf("unable to find queue %s", queueName) + } + return fmt.Errorf("get queue %s url: %v", queueName, err) + } + + k.queueUrl = *result.QueueUrl + + return nil +} + +func (k *AwsSqsPub) SendMessage(key string, message proto.Message) (err error) { + + text := proto.MarshalTextString(message) + + _, err = k.svc.SendMessage(&sqs.SendMessageInput{ + DelaySeconds: aws.Int64(10), + MessageAttributes: map[string]*sqs.MessageAttributeValue{ + "key": &sqs.MessageAttributeValue{ + DataType: aws.String("String"), + StringValue: aws.String(key), + }, + }, + MessageBody: aws.String(text), + QueueUrl: &k.queueUrl, + }) + + if err != nil { + return fmt.Errorf("send message to sqs %s: %v", k.queueUrl, err) + } + + return nil +} diff --git a/weed/notification/configuration.go b/weed/notification/configuration.go index e0ba61d58..36211692c 100644 --- a/weed/notification/configuration.go +++ b/weed/notification/configuration.go @@ -11,7 +11,7 @@ type MessageQueue interface { // GetName gets the name to locate the configuration in filer.toml file GetName() string // Initialize initializes the file store - Initialize(configuration util.Configuration) error + Initialize(configuration util.Configuration, prefix string) error SendMessage(key string, message proto.Message) error } @@ -21,23 +21,37 @@ var ( Queue MessageQueue ) -func LoadConfiguration(config *viper.Viper) { +func LoadConfiguration(config *viper.Viper, prefix string) { if config == nil { return } - for _, store := range MessageQueues { - if config.GetBool(store.GetName() + ".enabled") { - viperSub := config.Sub(store.GetName()) - if err := store.Initialize(viperSub); err != nil { - glog.Fatalf("Failed to initialize store for %s: %+v", - store.GetName(), err) + validateOneEnabledQueue(config) + + for _, queue := range MessageQueues { + if config.GetBool(prefix + queue.GetName() + ".enabled") { + if err := queue.Initialize(config, prefix+queue.GetName()+"."); err != nil { + glog.Fatalf("Failed to initialize notification for %s: %+v", + queue.GetName(), err) } - Queue = store - glog.V(0).Infof("Configure message queue for %s", store.GetName()) + Queue = queue + glog.V(0).Infof("Configure notification message queue for %s", queue.GetName()) return } } } + +func validateOneEnabledQueue(config *viper.Viper) { + enabledQueue := "" + for _, queue := range MessageQueues { + if config.GetBool(queue.GetName() + ".enabled") { + if enabledQueue == "" { + enabledQueue = queue.GetName() + } else { + glog.Fatalf("Notification message queue is enabled for both %s and %s", enabledQueue, queue.GetName()) + } + } + } +} diff --git a/weed/notification/gocdk_pub_sub/gocdk_pub_sub.go b/weed/notification/gocdk_pub_sub/gocdk_pub_sub.go new file mode 100644 index 000000000..1ae102509 --- /dev/null +++ b/weed/notification/gocdk_pub_sub/gocdk_pub_sub.go @@ -0,0 +1,71 @@ +// Package gocdk_pub_sub supports the Go CDK (Cloud Development Kit) PubSub API, +// which in turn supports many providers, including Amazon SNS/SQS, Azure Service Bus, +// Google Cloud PubSub, and RabbitMQ. +// +// In the config, select a provider and topic using a URL. See +// https://godoc.org/gocloud.dev/pubsub and its sub-packages for details. +// +// The Go CDK PubSub API does not support administrative operations like topic +// creation. Create the topic using a UI, CLI or provider-specific API before running +// weed. +// +// The Go CDK obtains credentials via environment variables and other +// provider-specific default mechanisms. See the provider's documentation for +// details. +package gocdk_pub_sub + +import ( + "context" + "fmt" + + "github.com/golang/protobuf/proto" + "gocloud.dev/pubsub" + _ "gocloud.dev/pubsub/awssnssqs" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/notification" + "github.com/chrislusf/seaweedfs/weed/util" + // _ "gocloud.dev/pubsub/azuresb" + _ "gocloud.dev/pubsub/gcppubsub" + _ "gocloud.dev/pubsub/natspubsub" + _ "gocloud.dev/pubsub/rabbitpubsub" +) + +func init() { + notification.MessageQueues = append(notification.MessageQueues, &GoCDKPubSub{}) +} + +type GoCDKPubSub struct { + topicURL string + topic *pubsub.Topic +} + +func (k *GoCDKPubSub) GetName() string { + return "gocdk_pub_sub" +} + +func (k *GoCDKPubSub) Initialize(configuration util.Configuration, prefix string) error { + k.topicURL = configuration.GetString(prefix + "topic_url") + glog.V(0).Infof("notification.gocdk_pub_sub.topic_url: %v", k.topicURL) + topic, err := pubsub.OpenTopic(context.Background(), k.topicURL) + if err != nil { + glog.Fatalf("Failed to open topic: %v", err) + } + k.topic = topic + return nil +} + +func (k *GoCDKPubSub) SendMessage(key string, message proto.Message) error { + bytes, err := proto.Marshal(message) + if err != nil { + return err + } + err = k.topic.Send(context.Background(), &pubsub.Message{ + Body: bytes, + Metadata: map[string]string{"key": key}, + }) + if err != nil { + return fmt.Errorf("send message via Go CDK pubsub %s: %v", k.topicURL, err) + } + return nil +} diff --git a/weed/notification/google_pub_sub/google_pub_sub.go b/weed/notification/google_pub_sub/google_pub_sub.go new file mode 100644 index 000000000..363a86eb6 --- /dev/null +++ b/weed/notification/google_pub_sub/google_pub_sub.go @@ -0,0 +1,89 @@ +package google_pub_sub + +import ( + "context" + "fmt" + "os" + + "cloud.google.com/go/pubsub" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/notification" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/golang/protobuf/proto" + "google.golang.org/api/option" +) + +func init() { + notification.MessageQueues = append(notification.MessageQueues, &GooglePubSub{}) +} + +type GooglePubSub struct { + topic *pubsub.Topic +} + +func (k *GooglePubSub) GetName() string { + return "google_pub_sub" +} + +func (k *GooglePubSub) Initialize(configuration util.Configuration, prefix string) (err error) { + glog.V(0).Infof("notification.google_pub_sub.project_id: %v", configuration.GetString(prefix+"project_id")) + glog.V(0).Infof("notification.google_pub_sub.topic: %v", configuration.GetString(prefix+"topic")) + return k.initialize( + configuration.GetString(prefix+"google_application_credentials"), + configuration.GetString(prefix+"project_id"), + configuration.GetString(prefix+"topic"), + ) +} + +func (k *GooglePubSub) initialize(google_application_credentials, projectId, topicName string) (err error) { + + ctx := context.Background() + // Creates a client. + if google_application_credentials == "" { + var found bool + google_application_credentials, found = os.LookupEnv("GOOGLE_APPLICATION_CREDENTIALS") + if !found { + glog.Fatalf("need to specific GOOGLE_APPLICATION_CREDENTIALS env variable or google_application_credentials in filer.toml") + } + } + + client, err := pubsub.NewClient(ctx, projectId, option.WithCredentialsFile(google_application_credentials)) + if err != nil { + glog.Fatalf("Failed to create client: %v", err) + } + + k.topic = client.Topic(topicName) + if exists, err := k.topic.Exists(ctx); err == nil { + if !exists { + k.topic, err = client.CreateTopic(ctx, topicName) + if err != nil { + glog.Fatalf("Failed to create topic %s: %v", topicName, err) + } + } + } else { + glog.Fatalf("Failed to check topic %s: %v", topicName, err) + } + + return nil +} + +func (k *GooglePubSub) SendMessage(key string, message proto.Message) (err error) { + + bytes, err := proto.Marshal(message) + if err != nil { + return + } + + ctx := context.Background() + result := k.topic.Publish(ctx, &pubsub.Message{ + Data: bytes, + Attributes: map[string]string{"key": key}, + }) + + _, err = result.Get(ctx) + if err != nil { + return fmt.Errorf("send message to google pub sub %s: %v", k.topic.String(), err) + } + + return nil +} diff --git a/weed/notification/kafka/kafka_queue.go b/weed/notification/kafka/kafka_queue.go index 830709a51..8d83b5892 100644 --- a/weed/notification/kafka/kafka_queue.go +++ b/weed/notification/kafka/kafka_queue.go @@ -21,12 +21,12 @@ func (k *KafkaQueue) GetName() string { return "kafka" } -func (k *KafkaQueue) Initialize(configuration util.Configuration) (err error) { - glog.V(0).Infof("filer.notification.kafka.hosts: %v\n", configuration.GetStringSlice("hosts")) - glog.V(0).Infof("filer.notification.kafka.topic: %v\n", configuration.GetString("topic")) +func (k *KafkaQueue) Initialize(configuration util.Configuration, prefix string) (err error) { + glog.V(0).Infof("filer.notification.kafka.hosts: %v\n", configuration.GetStringSlice(prefix+"hosts")) + glog.V(0).Infof("filer.notification.kafka.topic: %v\n", configuration.GetString(prefix+"topic")) return k.initialize( - configuration.GetStringSlice("hosts"), - configuration.GetString("topic"), + configuration.GetStringSlice(prefix+"hosts"), + configuration.GetString(prefix+"topic"), ) } @@ -76,7 +76,7 @@ func (k *KafkaQueue) handleError() { for { err := <-k.producer.Errors() if err != nil { - glog.Errorf("producer message error, partition:%d offset:%d key:%v valus:%s error(%v) topic:%s", err.Msg.Partition, err.Msg.Offset, err.Msg.Key, err.Msg.Value, err.Err, k.topic) + glog.Errorf("producer message error, partition:%d offset:%d key:%v value:%s error(%v) topic:%s", err.Msg.Partition, err.Msg.Offset, err.Msg.Key, err.Msg.Value, err.Err, k.topic) } } } diff --git a/weed/notification/log/log_queue.go b/weed/notification/log/log_queue.go index dcc038dfc..1ca4786a1 100644 --- a/weed/notification/log/log_queue.go +++ b/weed/notification/log/log_queue.go @@ -18,7 +18,7 @@ func (k *LogQueue) GetName() string { return "log" } -func (k *LogQueue) Initialize(configuration util.Configuration) (err error) { +func (k *LogQueue) Initialize(configuration util.Configuration, prefix string) (err error) { return nil } diff --git a/weed/operation/assign_file_id.go b/weed/operation/assign_file_id.go index 169fd664d..893bf516c 100644 --- a/weed/operation/assign_file_id.go +++ b/weed/operation/assign_file_id.go @@ -1,80 +1,103 @@ package operation import ( - "encoding/json" + "context" "fmt" - "net/url" - "strconv" + "strings" - "github.com/chrislusf/seaweedfs/weed/glog" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/util" ) type VolumeAssignRequest struct { - Count uint64 - Replication string - Collection string - Ttl string - DataCenter string - Rack string - DataNode string + Count uint64 + Replication string + Collection string + Ttl string + DataCenter string + Rack string + DataNode string + WritableVolumeCount uint32 } type AssignResult struct { - Fid string `json:"fid,omitempty"` - Url string `json:"url,omitempty"` - PublicUrl string `json:"publicUrl,omitempty"` - Count uint64 `json:"count,omitempty"` - Error string `json:"error,omitempty"` + Fid string `json:"fid,omitempty"` + Url string `json:"url,omitempty"` + PublicUrl string `json:"publicUrl,omitempty"` + Count uint64 `json:"count,omitempty"` + Error string `json:"error,omitempty"` + Auth security.EncodedJwt `json:"auth,omitempty"` } -func Assign(server string, primaryRequest *VolumeAssignRequest, alternativeRequests ...*VolumeAssignRequest) (*AssignResult, error) { +func Assign(server string, grpcDialOption grpc.DialOption, primaryRequest *VolumeAssignRequest, alternativeRequests ...*VolumeAssignRequest) (*AssignResult, error) { + var requests []*VolumeAssignRequest requests = append(requests, primaryRequest) requests = append(requests, alternativeRequests...) var lastError error + ret := &AssignResult{} + for i, request := range requests { if request == nil { continue } - values := make(url.Values) - values.Add("count", strconv.FormatUint(request.Count, 10)) - if request.Replication != "" { - values.Add("replication", request.Replication) - } - if request.Collection != "" { - values.Add("collection", request.Collection) - } - if request.Ttl != "" { - values.Add("ttl", request.Ttl) - } - if request.DataCenter != "" { - values.Add("dataCenter", request.DataCenter) - } - if request.Rack != "" { - values.Add("rack", request.Rack) - } - if request.DataNode != "" { - values.Add("dataNode", request.DataNode) - } - postUrl := fmt.Sprintf("http://%s/dir/assign", server) - jsonBlob, err := util.Post(postUrl, values) - glog.V(2).Infof("assign %d result from %s %+v : %s", i, postUrl, values, string(jsonBlob)) - if err != nil { - return nil, err - } - var ret AssignResult - err = json.Unmarshal(jsonBlob, &ret) - if err != nil { - return nil, fmt.Errorf("/dir/assign result JSON unmarshal error:%v, json:%s", err, string(jsonBlob)) + lastError = WithMasterServerClient(server, grpcDialOption, func(masterClient master_pb.SeaweedClient) error { + + req := &master_pb.AssignRequest{ + Count: primaryRequest.Count, + Replication: primaryRequest.Replication, + Collection: primaryRequest.Collection, + Ttl: primaryRequest.Ttl, + DataCenter: primaryRequest.DataCenter, + Rack: primaryRequest.Rack, + DataNode: primaryRequest.DataNode, + WritableVolumeCount: primaryRequest.WritableVolumeCount, + } + resp, grpcErr := masterClient.Assign(context.Background(), req) + if grpcErr != nil { + return grpcErr + } + + ret.Count = resp.Count + ret.Fid = resp.Fid + ret.Url = resp.Url + ret.PublicUrl = resp.PublicUrl + ret.Error = resp.Error + ret.Auth = security.EncodedJwt(resp.Auth) + + return nil + + }) + + if lastError != nil { + continue } + if ret.Count <= 0 { lastError = fmt.Errorf("assign failure %d: %v", i+1, ret.Error) continue } - return &ret, nil + } - return nil, lastError + + return ret, lastError +} + +func LookupJwt(master string, fileId string) security.EncodedJwt { + + tokenStr := "" + + if h, e := util.Head(fmt.Sprintf("http://%s/dir/lookup?fileId=%s", master, fileId)); e == nil { + bearer := h.Get("Authorization") + if len(bearer) > 7 && strings.ToUpper(bearer[0:6]) == "BEARER" { + tokenStr = bearer[7:] + } + } + + return security.EncodedJwt(tokenStr) } diff --git a/weed/operation/chunked_file.go b/weed/operation/chunked_file.go index 9d8267dee..295204dd8 100644 --- a/weed/operation/chunked_file.go +++ b/weed/operation/chunked_file.go @@ -5,9 +5,12 @@ import ( "errors" "fmt" "io" + "io/ioutil" "net/http" "sort" + "google.golang.org/grpc" + "sync" "github.com/chrislusf/seaweedfs/weed/glog" @@ -53,7 +56,7 @@ func (s ChunkList) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func LoadChunkManifest(buffer []byte, isGzipped bool) (*ChunkManifest, error) { if isGzipped { var err error - if buffer, err = UnGzipData(buffer); err != nil { + if buffer, err = util.UnGzipData(buffer); err != nil { return nil, err } } @@ -69,12 +72,12 @@ func (cm *ChunkManifest) Marshal() ([]byte, error) { return json.Marshal(cm) } -func (cm *ChunkManifest) DeleteChunks(master string) error { +func (cm *ChunkManifest) DeleteChunks(master string, grpcDialOption grpc.DialOption) error { var fileIds []string for _, ci := range cm.Chunks { fileIds = append(fileIds, ci.Fid) } - results, err := DeleteFiles(master, fileIds) + results, err := DeleteFiles(master, grpcDialOption, fileIds) if err != nil { glog.V(0).Infof("delete %+v: %v", fileIds, err) return fmt.Errorf("chunk delete: %v", err) @@ -102,7 +105,10 @@ func readChunkNeedle(fileUrl string, w io.Writer, offset int64) (written int64, if err != nil { return written, err } - defer resp.Body.Close() + defer func() { + io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() + }() switch resp.StatusCode { case http.StatusRequestedRangeNotSatisfiable: diff --git a/weed/operation/compress.go b/weed/operation/compress.go deleted file mode 100644 index de62e5bf7..000000000 --- a/weed/operation/compress.go +++ /dev/null @@ -1,59 +0,0 @@ -package operation - -import ( - "bytes" - "compress/flate" - "compress/gzip" - "io/ioutil" - "strings" - - "github.com/chrislusf/seaweedfs/weed/glog" -) - -/* -* Default more not to gzip since gzip can be done on client side. - */ -func IsGzippable(ext, mtype string) bool { - if strings.HasPrefix(mtype, "text/") { - return true - } - switch ext { - case ".zip", ".rar", ".gz", ".bz2", ".xz": - return false - case ".pdf", ".txt", ".html", ".htm", ".css", ".js", ".json": - return true - } - if strings.HasPrefix(mtype, "application/") { - if strings.HasSuffix(mtype, "xml") { - return true - } - if strings.HasSuffix(mtype, "script") { - return true - } - } - return false -} - -func GzipData(input []byte) ([]byte, error) { - buf := new(bytes.Buffer) - w, _ := gzip.NewWriterLevel(buf, flate.BestCompression) - if _, err := w.Write(input); err != nil { - glog.V(2).Infoln("error compressing data:", err) - return nil, err - } - if err := w.Close(); err != nil { - glog.V(2).Infoln("error closing compressed data:", err) - return nil, err - } - return buf.Bytes(), nil -} -func UnGzipData(input []byte) ([]byte, error) { - buf := bytes.NewBuffer(input) - r, _ := gzip.NewReader(buf) - defer r.Close() - output, err := ioutil.ReadAll(r) - if err != nil { - glog.V(2).Infoln("error uncompressing data:", err) - } - return output, err -} diff --git a/weed/operation/data_struts.go b/weed/operation/data_struts.go index bfc53aa50..4980f9913 100644 --- a/weed/operation/data_struts.go +++ b/weed/operation/data_struts.go @@ -2,6 +2,5 @@ package operation type JoinResult struct { VolumeSizeLimit uint64 `json:"VolumeSizeLimit,omitempty"` - SecretKey string `json:"secretKey,omitempty"` Error string `json:"error,omitempty"` } diff --git a/weed/operation/delete_content.go b/weed/operation/delete_content.go index 72041f126..361c09e7e 100644 --- a/weed/operation/delete_content.go +++ b/weed/operation/delete_content.go @@ -8,6 +8,8 @@ import ( "strings" "sync" + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" ) @@ -27,7 +29,17 @@ func ParseFileId(fid string) (vid string, key_cookie string, err error) { } // DeleteFiles batch deletes a list of fileIds -func DeleteFiles(master string, fileIds []string) ([]*volume_server_pb.DeleteResult, error) { +func DeleteFiles(master string, grpcDialOption grpc.DialOption, fileIds []string) ([]*volume_server_pb.DeleteResult, error) { + + lookupFunc := func(vids []string) (map[string]LookupResult, error) { + return LookupVolumeIds(master, grpcDialOption, vids) + } + + return DeleteFilesWithLookupVolumeId(grpcDialOption, fileIds, lookupFunc) + +} + +func DeleteFilesWithLookupVolumeId(grpcDialOption grpc.DialOption, fileIds []string, lookupFunc func(vid []string) (map[string]LookupResult, error)) ([]*volume_server_pb.DeleteResult, error) { var ret []*volume_server_pb.DeleteResult @@ -37,7 +49,7 @@ func DeleteFiles(master string, fileIds []string) ([]*volume_server_pb.DeleteRes vid, _, err := ParseFileId(fileId) if err != nil { ret = append(ret, &volume_server_pb.DeleteResult{ - FileId: vid, + FileId: fileId, Status: http.StatusBadRequest, Error: err.Error()}, ) @@ -50,7 +62,7 @@ func DeleteFiles(master string, fileIds []string) ([]*volume_server_pb.DeleteRes vid_to_fileIds[vid] = append(vid_to_fileIds[vid], fileId) } - lookupResults, err := LookupVolumeIds(master, vids) + lookupResults, err := lookupFunc(vids) if err != nil { return ret, err } @@ -74,30 +86,35 @@ func DeleteFiles(master string, fileIds []string) ([]*volume_server_pb.DeleteRes } } + resultChan := make(chan []*volume_server_pb.DeleteResult, len(server_to_fileIds)) var wg sync.WaitGroup - for server, fidList := range server_to_fileIds { wg.Add(1) go func(server string, fidList []string) { defer wg.Done() - if deleteResults, deleteErr := DeleteFilesAtOneVolumeServer(server, fidList); deleteErr != nil { + if deleteResults, deleteErr := DeleteFilesAtOneVolumeServer(server, grpcDialOption, fidList); deleteErr != nil { err = deleteErr - } else { - ret = append(ret, deleteResults...) + } else if deleteResults != nil { + resultChan <- deleteResults } }(server, fidList) } wg.Wait() + close(resultChan) + + for result := range resultChan { + ret = append(ret, result...) + } return ret, err } // DeleteFilesAtOneVolumeServer deletes a list of files that is on one volume server via gRpc -func DeleteFilesAtOneVolumeServer(volumeServer string, fileIds []string) (ret []*volume_server_pb.DeleteResult, err error) { +func DeleteFilesAtOneVolumeServer(volumeServer string, grpcDialOption grpc.DialOption, fileIds []string) (ret []*volume_server_pb.DeleteResult, err error) { - err = WithVolumeServerClient(volumeServer, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + err = WithVolumeServerClient(volumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { req := &volume_server_pb.BatchDeleteRequest{ FileIds: fileIds, @@ -105,7 +122,7 @@ func DeleteFilesAtOneVolumeServer(volumeServer string, fileIds []string) (ret [] resp, err := volumeServerClient.BatchDelete(context.Background(), req) - fmt.Printf("deleted %v %v: %v\n", fileIds, err, resp) + // fmt.Printf("deleted %v %v: %v\n", fileIds, err, resp) if err != nil { return err @@ -121,7 +138,7 @@ func DeleteFilesAtOneVolumeServer(volumeServer string, fileIds []string) (ret [] } for _, result := range ret { - if result.Error != "" { + if result.Error != "" && result.Error != "not found" { return nil, fmt.Errorf("delete fileId %s: %v", result.FileId, result.Error) } } diff --git a/weed/operation/grpc_client.go b/weed/operation/grpc_client.go index 5e6c23709..dccf85da4 100644 --- a/weed/operation/grpc_client.go +++ b/weed/operation/grpc_client.go @@ -5,28 +5,26 @@ import ( "strconv" "strings" + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/master_pb" "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - "github.com/chrislusf/seaweedfs/weed/util" ) -func WithVolumeServerClient(volumeServer string, fn func(volume_server_pb.VolumeServerClient) error) error { +func WithVolumeServerClient(volumeServer string, grpcDialOption grpc.DialOption, fn func(volume_server_pb.VolumeServerClient) error) error { grpcAddress, err := toVolumeServerGrpcAddress(volumeServer) if err != nil { return err } - grpcConnection, err := util.GrpcDial(grpcAddress) - if err != nil { - return fmt.Errorf("fail to dial %s: %v", grpcAddress, err) - } - defer grpcConnection.Close() + return pb.WithCachedGrpcClient(func(grpcConnection *grpc.ClientConn) error { + client := volume_server_pb.NewVolumeServerClient(grpcConnection) + return fn(client) + }, grpcAddress, grpcDialOption) - client := volume_server_pb.NewVolumeServerClient(grpcConnection) - - return fn(client) } func toVolumeServerGrpcAddress(volumeServer string) (grpcAddress string, err error) { @@ -39,15 +37,16 @@ func toVolumeServerGrpcAddress(volumeServer string) (grpcAddress string, err err return fmt.Sprintf("%s:%d", volumeServer[0:sepIndex], port+10000), nil } -func withMasterServerClient(masterServer string, fn func(masterClient master_pb.SeaweedClient) error) error { +func WithMasterServerClient(masterServer string, grpcDialOption grpc.DialOption, fn func(masterClient master_pb.SeaweedClient) error) error { - grpcConnection, err := util.GrpcDial(masterServer) - if err != nil { - return fmt.Errorf("fail to dial %s: %v", masterServer, err) + masterGrpcAddress, parseErr := pb.ParseServerToGrpcAddress(masterServer) + if parseErr != nil { + return fmt.Errorf("failed to parse master grpc %v: %v", masterServer, parseErr) } - defer grpcConnection.Close() - client := master_pb.NewSeaweedClient(grpcConnection) + return pb.WithCachedGrpcClient(func(grpcConnection *grpc.ClientConn) error { + client := master_pb.NewSeaweedClient(grpcConnection) + return fn(client) + }, masterGrpcAddress, grpcDialOption) - return fn(client) } diff --git a/weed/operation/list_masters.go b/weed/operation/list_masters.go deleted file mode 100644 index 75838de4d..000000000 --- a/weed/operation/list_masters.go +++ /dev/null @@ -1,32 +0,0 @@ -package operation - -import ( - "encoding/json" - - "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/util" -) - -type ClusterStatusResult struct { - IsLeader bool `json:"IsLeader,omitempty"` - Leader string `json:"Leader,omitempty"` - Peers []string `json:"Peers,omitempty"` -} - -func ListMasters(server string) (leader string, peers []string, err error) { - jsonBlob, err := util.Get("http://" + server + "/cluster/status") - glog.V(2).Info("list masters result :", string(jsonBlob)) - if err != nil { - return "", nil, err - } - var ret ClusterStatusResult - err = json.Unmarshal(jsonBlob, &ret) - if err != nil { - return "", nil, err - } - peers = ret.Peers - if ret.IsLeader { - peers = append(peers, ret.Leader) - } - return ret.Leader, peers, nil -} diff --git a/weed/operation/lookup.go b/weed/operation/lookup.go index c495af1af..d0773e7fd 100644 --- a/weed/operation/lookup.go +++ b/weed/operation/lookup.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "google.golang.org/grpc" "math/rand" "net/url" "strings" @@ -78,7 +79,7 @@ func LookupFileId(server string, fileId string) (fullUrl string, err error) { } // LookupVolumeIds find volume locations by cache and actual lookup -func LookupVolumeIds(server string, vids []string) (map[string]LookupResult, error) { +func LookupVolumeIds(server string, grpcDialOption grpc.DialOption, vids []string) (map[string]LookupResult, error) { ret := make(map[string]LookupResult) var unknown_vids []string @@ -98,7 +99,8 @@ func LookupVolumeIds(server string, vids []string) (map[string]LookupResult, err //only query unknown_vids - err := withMasterServerClient(server, func(masterClient master_pb.SeaweedClient) error { + err := WithMasterServerClient(server, grpcDialOption, func(masterClient master_pb.SeaweedClient) error { + req := &master_pb.LookupVolumeRequest{ VolumeIds: unknown_vids, } diff --git a/weed/operation/lookup_vid_cache.go b/weed/operation/lookup_vid_cache.go index d4980a1f5..ccc1f2beb 100644 --- a/weed/operation/lookup_vid_cache.go +++ b/weed/operation/lookup_vid_cache.go @@ -9,6 +9,8 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" ) +var ErrorNotFound = errors.New("not found") + type VidInfo struct { Locations []Location NextRefreshTime time.Time @@ -28,14 +30,14 @@ func (vc *VidCache) Get(vid string) ([]Location, error) { defer vc.RUnlock() if 0 < id && id <= len(vc.cache) { if vc.cache[id-1].Locations == nil { - return nil, errors.New("Not Set") + return nil, errors.New("not set") } if vc.cache[id-1].NextRefreshTime.Before(time.Now()) { - return nil, errors.New("Expired") + return nil, errors.New("expired") } return vc.cache[id-1].Locations, nil } - return nil, errors.New("Not Found") + return nil, ErrorNotFound } func (vc *VidCache) Set(vid string, locations []Location, duration time.Duration) { id, err := strconv.Atoi(vid) diff --git a/weed/operation/stats.go b/weed/operation/stats.go new file mode 100644 index 000000000..b69a33750 --- /dev/null +++ b/weed/operation/stats.go @@ -0,0 +1,26 @@ +package operation + +import ( + "context" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" +) + +func Statistics(server string, grpcDialOption grpc.DialOption, req *master_pb.StatisticsRequest) (resp *master_pb.StatisticsResponse, err error) { + + err = WithMasterServerClient(server, grpcDialOption, func(masterClient master_pb.SeaweedClient) error { + + grpcResponse, grpcErr := masterClient.Statistics(context.Background(), req) + if grpcErr != nil { + return grpcErr + } + + resp = grpcResponse + + return nil + + }) + + return +} diff --git a/weed/operation/submit.go b/weed/operation/submit.go index b81f64593..5e4dc4374 100644 --- a/weed/operation/submit.go +++ b/weed/operation/submit.go @@ -1,7 +1,6 @@ package operation import ( - "bytes" "io" "mime" "net/url" @@ -10,6 +9,8 @@ import ( "strconv" "strings" + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/security" ) @@ -18,7 +19,6 @@ type FilePart struct { Reader io.Reader FileName string FileSize int64 - IsGzipped bool MimeType string ModTime int64 //in seconds Replication string @@ -37,10 +37,8 @@ type SubmitResult struct { Error string `json:"error,omitempty"` } -func SubmitFiles(master string, files []FilePart, - replication string, collection string, dataCenter string, ttl string, maxMB int, - secret security.Secret, -) ([]SubmitResult, error) { +func SubmitFiles(master string, grpcDialOption grpc.DialOption, files []FilePart, + replication string, collection string, dataCenter string, ttl string, maxMB int) ([]SubmitResult, error) { results := make([]SubmitResult, len(files)) for index, file := range files { results[index].FileName = file.FileName @@ -52,9 +50,9 @@ func SubmitFiles(master string, files []FilePart, DataCenter: dataCenter, Ttl: ttl, } - ret, err := Assign(master, ar) + ret, err := Assign(master, grpcDialOption, ar) if err != nil { - for index, _ := range files { + for index := range files { results[index].Error = err.Error() } return results, err @@ -68,7 +66,7 @@ func SubmitFiles(master string, files []FilePart, file.Replication = replication file.Collection = collection file.DataCenter = dataCenter - results[index].Size, err = file.Upload(maxMB, master, secret) + results[index].Size, err = file.Upload(maxMB, master, ret.Auth, grpcDialOption) if err != nil { results[index].Error = err.Error() } @@ -103,7 +101,6 @@ func newFilePart(fullPathFilename string) (ret FilePart, err error) { ret.ModTime = fi.ModTime().UTC().Unix() ret.FileSize = fi.Size() ext := strings.ToLower(path.Ext(fullPathFilename)) - ret.IsGzipped = ext == ".gz" ret.FileName = fi.Name() if ext != "" { ret.MimeType = mime.TypeByExtension(ext) @@ -112,8 +109,7 @@ func newFilePart(fullPathFilename string) (ret FilePart, err error) { return ret, nil } -func (fi FilePart) Upload(maxMB int, master string, secret security.Secret) (retSize uint32, err error) { - jwt := security.GenJwt(secret, fi.Fid) +func (fi FilePart) Upload(maxMB int, master string, jwt security.EncodedJwt, grpcDialOption grpc.DialOption) (retSize uint32, err error) { fileUrl := "http://" + fi.Server + "/" + fi.Fid if fi.ModTime != 0 { fileUrl += "?ts=" + strconv.Itoa(int(fi.ModTime)) @@ -141,7 +137,7 @@ func (fi FilePart) Upload(maxMB int, master string, secret security.Secret) (ret Collection: fi.Collection, Ttl: fi.Ttl, } - ret, err = Assign(master, ar) + ret, err = Assign(master, grpcDialOption, ar) if err != nil { return } @@ -154,10 +150,10 @@ func (fi FilePart) Upload(maxMB int, master string, secret security.Secret) (ret Collection: fi.Collection, Ttl: fi.Ttl, } - ret, err = Assign(master, ar) + ret, err = Assign(master, grpcDialOption, ar) if err != nil { // delete all uploaded chunks - cm.DeleteChunks(master) + cm.DeleteChunks(master, grpcDialOption) return } id = ret.Fid @@ -172,10 +168,10 @@ func (fi FilePart) Upload(maxMB int, master string, secret security.Secret) (ret baseName+"-"+strconv.FormatInt(i+1, 10), io.LimitReader(fi.Reader, chunkSize), master, fileUrl, - jwt) + ret.Auth) if e != nil { // delete all uploaded chunks - cm.DeleteChunks(master) + cm.DeleteChunks(master, grpcDialOption) return 0, e } cm.Chunks = append(cm.Chunks, @@ -190,10 +186,10 @@ func (fi FilePart) Upload(maxMB int, master string, secret security.Secret) (ret err = upload_chunked_file_manifest(fileUrl, &cm, jwt) if err != nil { // delete all uploaded chunks - cm.DeleteChunks(master) + cm.DeleteChunks(master, grpcDialOption) } } else { - ret, e := Upload(fileUrl, baseName, fi.Reader, fi.IsGzipped, fi.MimeType, nil, jwt) + ret, e := Upload(fileUrl, baseName, false, fi.Reader, false, fi.MimeType, nil, jwt) if e != nil { return 0, e } @@ -206,8 +202,7 @@ func upload_one_chunk(filename string, reader io.Reader, master, fileUrl string, jwt security.EncodedJwt, ) (size uint32, e error) { glog.V(4).Info("Uploading part ", filename, " to ", fileUrl, "...") - uploadResult, uploadError := Upload(fileUrl, filename, reader, false, - "application/octet-stream", nil, jwt) + uploadResult, uploadError := Upload(fileUrl, filename, false, reader, false, "", nil, jwt) if uploadError != nil { return 0, uploadError } @@ -219,12 +214,11 @@ func upload_chunked_file_manifest(fileUrl string, manifest *ChunkManifest, jwt s if e != nil { return e } - bufReader := bytes.NewReader(buf) glog.V(4).Info("Uploading chunks manifest ", manifest.Name, " to ", fileUrl, "...") u, _ := url.Parse(fileUrl) q := u.Query() q.Set("cm", "true") u.RawQuery = q.Encode() - _, e = Upload(u.String(), manifest.Name, bufReader, false, "application/json", nil, jwt) + _, e = UploadData(u.String(), manifest.Name, false, buf, false, "application/json", nil, jwt) return e } diff --git a/weed/operation/sync_volume.go b/weed/operation/sync_volume.go index 662184656..5562f12ab 100644 --- a/weed/operation/sync_volume.go +++ b/weed/operation/sync_volume.go @@ -2,44 +2,19 @@ package operation import ( "context" - "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - . "github.com/chrislusf/seaweedfs/weed/storage/types" - "github.com/chrislusf/seaweedfs/weed/util" + "google.golang.org/grpc" ) -func GetVolumeSyncStatus(server string, vid uint32) (resp *volume_server_pb.VolumeSyncStatusResponse, err error) { +func GetVolumeSyncStatus(server string, grpcDialOption grpc.DialOption, vid uint32) (resp *volume_server_pb.VolumeSyncStatusResponse, err error) { + + WithVolumeServerClient(server, grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { - WithVolumeServerClient(server, func(client volume_server_pb.VolumeServerClient) error { resp, err = client.VolumeSyncStatus(context.Background(), &volume_server_pb.VolumeSyncStatusRequest{ - VolumdId: vid, + VolumeId: vid, }) return nil }) return } - -func GetVolumeIdxEntries(server string, vid uint32, eachEntryFn func(key NeedleId, offset Offset, size uint32)) error { - - return WithVolumeServerClient(server, func(client volume_server_pb.VolumeServerClient) error { - resp, err := client.VolumeSyncIndex(context.Background(), &volume_server_pb.VolumeSyncIndexRequest{ - VolumdId: vid, - }) - if err != nil { - return err - } - - dataSize := len(resp.IndexFileContent) - - for idx := 0; idx+NeedleEntrySize <= dataSize; idx += NeedleEntrySize { - line := resp.IndexFileContent[idx : idx+NeedleEntrySize] - key := BytesToNeedleId(line[:NeedleIdSize]) - offset := BytesToOffset(line[NeedleIdSize : NeedleIdSize+OffsetSize]) - size := util.BytesToUint32(line[NeedleIdSize+OffsetSize : NeedleIdSize+OffsetSize+SizeSize]) - eachEntryFn(key, offset, size) - } - - return nil - }) -} diff --git a/weed/operation/tail_volume.go b/weed/operation/tail_volume.go new file mode 100644 index 000000000..3cd66b5da --- /dev/null +++ b/weed/operation/tail_volume.go @@ -0,0 +1,83 @@ +package operation + +import ( + "context" + "fmt" + "io" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func TailVolume(master string, grpcDialOption grpc.DialOption, vid needle.VolumeId, sinceNs uint64, timeoutSeconds int, fn func(n *needle.Needle) error) error { + // find volume location, replication, ttl info + lookup, err := Lookup(master, vid.String()) + if err != nil { + return fmt.Errorf("look up volume %d: %v", vid, err) + } + if len(lookup.Locations) == 0 { + return fmt.Errorf("unable to locate volume %d", vid) + } + + volumeServer := lookup.Locations[0].Url + + return TailVolumeFromSource(volumeServer, grpcDialOption, vid, sinceNs, timeoutSeconds, fn) +} + +func TailVolumeFromSource(volumeServer string, grpcDialOption grpc.DialOption, vid needle.VolumeId, sinceNs uint64, idleTimeoutSeconds int, fn func(n *needle.Needle) error) error { + return WithVolumeServerClient(volumeServer, grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + + stream, err := client.VolumeTailSender(context.Background(), &volume_server_pb.VolumeTailSenderRequest{ + VolumeId: uint32(vid), + SinceNs: sinceNs, + IdleTimeoutSeconds: uint32(idleTimeoutSeconds), + }) + if err != nil { + return err + } + + for { + resp, recvErr := stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + break + } else { + return recvErr + } + } + + needleHeader := resp.NeedleHeader + needleBody := resp.NeedleBody + + if len(needleHeader) == 0 { + continue + } + + for !resp.IsLastChunk { + resp, recvErr = stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + break + } else { + return recvErr + } + } + needleBody = append(needleBody, resp.NeedleBody...) + } + + n := new(needle.Needle) + n.ParseNeedleHeader(needleHeader) + n.ReadNeedleBodyBytes(needleBody, needle.CurrentVersion) + + err = fn(n) + + if err != nil { + return err + } + + } + return nil + }) +} diff --git a/weed/operation/upload_content.go b/weed/operation/upload_content.go index 1fd0ed2b9..52f8f9e2b 100644 --- a/weed/operation/upload_content.go +++ b/weed/operation/upload_content.go @@ -2,6 +2,8 @@ package operation import ( "bytes" + "compress/flate" + "crypto/md5" "encoding/json" "errors" "fmt" @@ -16,13 +18,18 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/util" ) type UploadResult struct { - Name string `json:"name,omitempty"` - Size uint32 `json:"size,omitempty"` - Error string `json:"error,omitempty"` - ETag string `json:"error,omitempty"` + Name string `json:"name,omitempty"` + Size uint32 `json:"size,omitempty"` + Error string `json:"error,omitempty"` + ETag string `json:"eTag,omitempty"` + CipherKey []byte `json:"cipherKey,omitempty"` + Mime string `json:"mime,omitempty"` + Gzip uint32 `json:"gzip,omitempty"` + Md5 string `json:"md5,omitempty"` } var ( @@ -37,12 +44,162 @@ func init() { var fileNameEscaper = strings.NewReplacer("\\", "\\\\", "\"", "\\\"") -func Upload(uploadUrl string, filename string, reader io.Reader, isGzipped bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (*UploadResult, error) { - return upload_content(uploadUrl, func(w io.Writer) (err error) { - _, err = io.Copy(w, reader) +// Upload sends a POST request to a volume server to upload the content with adjustable compression level +func UploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputGzipped bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) { + hash := md5.New() + hash.Write(data) + uploadResult, err = doUploadData(uploadUrl, filename, cipher, data, isInputGzipped, mtype, pairMap, jwt) + if uploadResult != nil { + uploadResult.Md5 = fmt.Sprintf("%x", hash.Sum(nil)) + } + return +} + +// Upload sends a POST request to a volume server to upload the content with fast compression +func Upload(uploadUrl string, filename string, cipher bool, reader io.Reader, isInputGzipped bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) { + hash := md5.New() + reader = io.TeeReader(reader, hash) + uploadResult, err = doUpload(uploadUrl, filename, cipher, reader, isInputGzipped, mtype, pairMap, flate.BestSpeed, jwt) + if uploadResult != nil { + uploadResult.Md5 = fmt.Sprintf("%x", hash.Sum(nil)) + } + return +} + +func doUploadData(uploadUrl string, filename string, cipher bool, data []byte, isInputGzipped bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) { + contentIsGzipped := isInputGzipped + shouldGzipNow := false + if !isInputGzipped { + if shouldBeZipped, iAmSure := util.IsGzippableFileType(filepath.Base(filename), mtype); mtype == "" || iAmSure && shouldBeZipped { + shouldGzipNow = true + contentIsGzipped = true + } + } + + var clearDataLen int + + // gzip if possible + // this could be double copying + clearDataLen = len(data) + if shouldGzipNow { + data, err = util.GzipData(data) + } else if isInputGzipped { + // just to get the clear data length + clearData, err := util.UnGzipData(data) + if err == nil { + clearDataLen = len(clearData) + } + } + + if cipher { + // encrypt(gzip(data)) + + // encrypt + cipherKey := util.GenCipherKey() + encryptedData, encryptionErr := util.Encrypt(data, cipherKey) + if encryptionErr != nil { + err = fmt.Errorf("encrypt input: %v", encryptionErr) + return + } + + // upload data + uploadResult, err = upload_content(uploadUrl, func(w io.Writer) (err error) { + _, err = w.Write(encryptedData) + return + }, "", false, "", nil, jwt) + if uploadResult != nil { + uploadResult.Name = filename + uploadResult.Mime = mtype + uploadResult.CipherKey = cipherKey + } + } else { + // upload data + uploadResult, err = upload_content(uploadUrl, func(w io.Writer) (err error) { + _, err = w.Write(data) + return + }, filename, contentIsGzipped, mtype, pairMap, jwt) + } + + uploadResult.Size = uint32(clearDataLen) + if contentIsGzipped { + uploadResult.Gzip = 1 + } + + return uploadResult, err +} + +func doUpload(uploadUrl string, filename string, cipher bool, reader io.Reader, isInputGzipped bool, mtype string, pairMap map[string]string, compression int, jwt security.EncodedJwt) (uploadResult *UploadResult, err error) { + contentIsGzipped := isInputGzipped + shouldGzipNow := false + if !isInputGzipped { + if shouldBeZipped, iAmSure := util.IsGzippableFileType(filepath.Base(filename), mtype); mtype == "" || iAmSure && shouldBeZipped { + shouldGzipNow = true + contentIsGzipped = true + } + } + + var clearDataLen int + + // gzip if possible + // this could be double copying + data, readErr := ioutil.ReadAll(reader) + if readErr != nil { + err = fmt.Errorf("read input: %v", readErr) + return + } + clearDataLen = len(data) + if shouldGzipNow { + data, err = util.GzipData(data) + } else if isInputGzipped { + // just to get the clear data length + clearData, err := util.UnGzipData(data) + if err == nil { + clearDataLen = len(clearData) + } + } + + if cipher { + // encrypt(gzip(data)) + + // encrypt + cipherKey := util.GenCipherKey() + encryptedData, encryptionErr := util.Encrypt(data, cipherKey) + if encryptionErr != nil { + err = fmt.Errorf("encrypt input: %v", encryptionErr) + return + } + + // upload data + uploadResult, err = upload_content(uploadUrl, func(w io.Writer) (err error) { + _, err = w.Write(encryptedData) + return + }, "", false, "", nil, jwt) + if uploadResult != nil { + uploadResult.Name = filename + uploadResult.Mime = mtype + uploadResult.CipherKey = cipherKey + uploadResult.Size = uint32(clearDataLen) + } + } else { + // upload data + uploadResult, err = upload_content(uploadUrl, func(w io.Writer) (err error) { + _, err = w.Write(data) + return + }, filename, contentIsGzipped, mtype, pairMap, jwt) + } + + if uploadResult == nil { return - }, filename, isGzipped, mtype, pairMap, jwt) + } + + uploadResult.Size = uint32(clearDataLen) + if contentIsGzipped { + uploadResult.Gzip = 1 + } + + return uploadResult, err } + func upload_content(uploadUrl string, fillBufferFunction func(w io.Writer) error, filename string, isGzipped bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (*UploadResult, error) { body_buf := bytes.NewBufferString("") body_writer := multipart.NewWriter(body_buf) @@ -57,9 +214,6 @@ func upload_content(uploadUrl string, fillBufferFunction func(w io.Writer) error if isGzipped { h.Set("Content-Encoding", "gzip") } - if jwt != "" { - h.Set("Authorization", "BEARER "+string(jwt)) - } file_writer, cp_err := body_writer.CreatePart(h) if cp_err != nil { @@ -85,6 +239,9 @@ func upload_content(uploadUrl string, fillBufferFunction func(w io.Writer) error for k, v := range pairMap { req.Header.Set(k, v) } + if jwt != "" { + req.Header.Set("Authorization", "BEARER "+string(jwt)) + } resp, post_err := client.Do(req) if post_err != nil { glog.V(0).Infoln("failing to upload to", uploadUrl, post_err.Error()) diff --git a/weed/pb/Makefile b/weed/pb/Makefile index 2e4d33718..6680b7ca2 100644 --- a/weed/pb/Makefile +++ b/weed/pb/Makefile @@ -6,3 +6,7 @@ gen: protoc master.proto --go_out=plugins=grpc:./master_pb protoc volume_server.proto --go_out=plugins=grpc:./volume_server_pb protoc filer.proto --go_out=plugins=grpc:./filer_pb + protoc iam.proto --go_out=plugins=grpc:./iam_pb + protoc queue.proto --go_out=plugins=grpc:./queue_pb + # protoc filer.proto --java_out=../../other/java/client/src/main/java + cp filer.proto ../../other/java/client/src/main/proto diff --git a/weed/pb/filer.proto b/weed/pb/filer.proto index 3f7c39c89..8df46e917 100644 --- a/weed/pb/filer.proto +++ b/weed/pb/filer.proto @@ -2,6 +2,9 @@ syntax = "proto3"; package filer_pb; +option java_package = "seaweedfs.client"; +option java_outer_classname = "FilerProto"; + ////////////////////////////////////////////////// service SeaweedFiler { @@ -9,7 +12,7 @@ service SeaweedFiler { rpc LookupDirectoryEntry (LookupDirectoryEntryRequest) returns (LookupDirectoryEntryResponse) { } - rpc ListEntries (ListEntriesRequest) returns (ListEntriesResponse) { + rpc ListEntries (ListEntriesRequest) returns (stream ListEntriesResponse) { } rpc CreateEntry (CreateEntryRequest) returns (CreateEntryResponse) { @@ -21,6 +24,12 @@ service SeaweedFiler { rpc DeleteEntry (DeleteEntryRequest) returns (DeleteEntryResponse) { } + rpc StreamDeleteEntries (stream DeleteEntryRequest) returns (stream DeleteEntryResponse) { + } + + rpc AtomicRenameEntry (AtomicRenameEntryRequest) returns (AtomicRenameEntryResponse) { + } + rpc AssignVolume (AssignVolumeRequest) returns (AssignVolumeResponse) { } @@ -30,6 +39,12 @@ service SeaweedFiler { rpc DeleteCollection (DeleteCollectionRequest) returns (DeleteCollectionResponse) { } + rpc Statistics (StatisticsRequest) returns (StatisticsResponse) { + } + + rpc GetFilerConfiguration (GetFilerConfigurationRequest) returns (GetFilerConfigurationResponse) { + } + } ////////////////////////////////////////////////// @@ -52,7 +67,7 @@ message ListEntriesRequest { } message ListEntriesResponse { - repeated Entry entries = 1; + Entry entry = 1; } message Entry { @@ -63,40 +78,61 @@ message Entry { map<string, bytes> extended = 5; } +message FullEntry { + string dir = 1; + Entry entry = 2; +} + message EventNotification { Entry old_entry = 1; Entry new_entry = 2; bool delete_chunks = 3; + string new_parent_path = 4; } message FileChunk { - string file_id = 1; + string file_id = 1; // to be deprecated int64 offset = 2; uint64 size = 3; int64 mtime = 4; string e_tag = 5; - string source_file_id = 6; + string source_file_id = 6; // to be deprecated + FileId fid = 7; + FileId source_fid = 8; + bytes cipher_key = 9; + bool is_gzipped = 10; +} + +message FileId { + uint32 volume_id = 1; + uint64 file_key = 2; + fixed32 cookie = 3; } message FuseAttributes { uint64 file_size = 1; - int64 mtime = 2; + int64 mtime = 2; // unix time in seconds uint32 file_mode = 3; uint32 uid = 4; uint32 gid = 5; - int64 crtime = 6; + int64 crtime = 6; // unix time in seconds string mime = 7; string replication = 8; string collection = 9; int32 ttl_sec = 10; + string user_name = 11; // for hdfs + repeated string group_name = 12; // for hdfs + string symlink_target = 13; } message CreateEntryRequest { string directory = 1; Entry entry = 2; + bool o_excl = 3; } message CreateEntryResponse { + string error = 1; } message UpdateEntryRequest { @@ -109,12 +145,24 @@ message UpdateEntryResponse { message DeleteEntryRequest { string directory = 1; string name = 2; - bool is_directory = 3; + // bool is_directory = 3; bool is_delete_data = 4; bool is_recursive = 5; + bool ignore_recursive_error = 6; } message DeleteEntryResponse { + string error = 1; +} + +message AtomicRenameEntryRequest { + string old_directory = 1; + string old_name = 2; + string new_directory = 3; + string new_name = 4; +} + +message AtomicRenameEntryResponse { } message AssignVolumeRequest { @@ -123,6 +171,7 @@ message AssignVolumeRequest { string replication = 3; int32 ttl_sec = 4; string data_center = 5; + string parent_path = 6; } message AssignVolumeResponse { @@ -130,6 +179,10 @@ message AssignVolumeResponse { string url = 2; string public_url = 3; int32 count = 4; + string auth = 5; + string collection = 6; + string replication = 7; + string error = 8; } message LookupVolumeRequest { @@ -154,3 +207,29 @@ message DeleteCollectionRequest { message DeleteCollectionResponse { } + +message StatisticsRequest { + string replication = 1; + string collection = 2; + string ttl = 3; +} +message StatisticsResponse { + string replication = 1; + string collection = 2; + string ttl = 3; + uint64 total_size = 4; + uint64 used_size = 5; + uint64 file_count = 6; +} + +message GetFilerConfigurationRequest { +} +message GetFilerConfigurationResponse { + repeated string masters = 1; + string replication = 2; + string collection = 3; + uint32 max_mb = 4; + string dir_buckets = 5; + string dir_queues = 6; + bool cipher = 7; +} diff --git a/weed/pb/filer_pb/filer.pb.go b/weed/pb/filer_pb/filer.pb.go index f4b5355ea..9cf659ece 100644 --- a/weed/pb/filer_pb/filer.pb.go +++ b/weed/pb/filer_pb/filer.pb.go @@ -14,8 +14,10 @@ It has these top-level messages: ListEntriesRequest ListEntriesResponse Entry + FullEntry EventNotification FileChunk + FileId FuseAttributes CreateEntryRequest CreateEntryResponse @@ -23,6 +25,8 @@ It has these top-level messages: UpdateEntryResponse DeleteEntryRequest DeleteEntryResponse + AtomicRenameEntryRequest + AtomicRenameEntryResponse AssignVolumeRequest AssignVolumeResponse LookupVolumeRequest @@ -31,6 +35,10 @@ It has these top-level messages: LookupVolumeResponse DeleteCollectionRequest DeleteCollectionResponse + StatisticsRequest + StatisticsResponse + GetFilerConfigurationRequest + GetFilerConfigurationResponse */ package filer_pb @@ -143,7 +151,7 @@ func (m *ListEntriesRequest) GetLimit() uint32 { } type ListEntriesResponse struct { - Entries []*Entry `protobuf:"bytes,1,rep,name=entries" json:"entries,omitempty"` + Entry *Entry `protobuf:"bytes,1,opt,name=entry" json:"entry,omitempty"` } func (m *ListEntriesResponse) Reset() { *m = ListEntriesResponse{} } @@ -151,9 +159,9 @@ func (m *ListEntriesResponse) String() string { return proto.CompactT func (*ListEntriesResponse) ProtoMessage() {} func (*ListEntriesResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} } -func (m *ListEntriesResponse) GetEntries() []*Entry { +func (m *ListEntriesResponse) GetEntry() *Entry { if m != nil { - return m.Entries + return m.Entry } return nil } @@ -206,16 +214,41 @@ func (m *Entry) GetExtended() map[string][]byte { return nil } +type FullEntry struct { + Dir string `protobuf:"bytes,1,opt,name=dir" json:"dir,omitempty"` + Entry *Entry `protobuf:"bytes,2,opt,name=entry" json:"entry,omitempty"` +} + +func (m *FullEntry) Reset() { *m = FullEntry{} } +func (m *FullEntry) String() string { return proto.CompactTextString(m) } +func (*FullEntry) ProtoMessage() {} +func (*FullEntry) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} } + +func (m *FullEntry) GetDir() string { + if m != nil { + return m.Dir + } + return "" +} + +func (m *FullEntry) GetEntry() *Entry { + if m != nil { + return m.Entry + } + return nil +} + type EventNotification struct { - OldEntry *Entry `protobuf:"bytes,1,opt,name=old_entry,json=oldEntry" json:"old_entry,omitempty"` - NewEntry *Entry `protobuf:"bytes,2,opt,name=new_entry,json=newEntry" json:"new_entry,omitempty"` - DeleteChunks bool `protobuf:"varint,3,opt,name=delete_chunks,json=deleteChunks" json:"delete_chunks,omitempty"` + OldEntry *Entry `protobuf:"bytes,1,opt,name=old_entry,json=oldEntry" json:"old_entry,omitempty"` + NewEntry *Entry `protobuf:"bytes,2,opt,name=new_entry,json=newEntry" json:"new_entry,omitempty"` + DeleteChunks bool `protobuf:"varint,3,opt,name=delete_chunks,json=deleteChunks" json:"delete_chunks,omitempty"` + NewParentPath string `protobuf:"bytes,4,opt,name=new_parent_path,json=newParentPath" json:"new_parent_path,omitempty"` } func (m *EventNotification) Reset() { *m = EventNotification{} } func (m *EventNotification) String() string { return proto.CompactTextString(m) } func (*EventNotification) ProtoMessage() {} -func (*EventNotification) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} } +func (*EventNotification) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} } func (m *EventNotification) GetOldEntry() *Entry { if m != nil { @@ -238,19 +271,30 @@ func (m *EventNotification) GetDeleteChunks() bool { return false } +func (m *EventNotification) GetNewParentPath() string { + if m != nil { + return m.NewParentPath + } + return "" +} + type FileChunk struct { - FileId string `protobuf:"bytes,1,opt,name=file_id,json=fileId" json:"file_id,omitempty"` - Offset int64 `protobuf:"varint,2,opt,name=offset" json:"offset,omitempty"` - Size uint64 `protobuf:"varint,3,opt,name=size" json:"size,omitempty"` - Mtime int64 `protobuf:"varint,4,opt,name=mtime" json:"mtime,omitempty"` - ETag string `protobuf:"bytes,5,opt,name=e_tag,json=eTag" json:"e_tag,omitempty"` - SourceFileId string `protobuf:"bytes,6,opt,name=source_file_id,json=sourceFileId" json:"source_file_id,omitempty"` + FileId string `protobuf:"bytes,1,opt,name=file_id,json=fileId" json:"file_id,omitempty"` + Offset int64 `protobuf:"varint,2,opt,name=offset" json:"offset,omitempty"` + Size uint64 `protobuf:"varint,3,opt,name=size" json:"size,omitempty"` + Mtime int64 `protobuf:"varint,4,opt,name=mtime" json:"mtime,omitempty"` + ETag string `protobuf:"bytes,5,opt,name=e_tag,json=eTag" json:"e_tag,omitempty"` + SourceFileId string `protobuf:"bytes,6,opt,name=source_file_id,json=sourceFileId" json:"source_file_id,omitempty"` + Fid *FileId `protobuf:"bytes,7,opt,name=fid" json:"fid,omitempty"` + SourceFid *FileId `protobuf:"bytes,8,opt,name=source_fid,json=sourceFid" json:"source_fid,omitempty"` + CipherKey []byte `protobuf:"bytes,9,opt,name=cipher_key,json=cipherKey,proto3" json:"cipher_key,omitempty"` + IsGzipped bool `protobuf:"varint,10,opt,name=is_gzipped,json=isGzipped" json:"is_gzipped,omitempty"` } func (m *FileChunk) Reset() { *m = FileChunk{} } func (m *FileChunk) String() string { return proto.CompactTextString(m) } func (*FileChunk) ProtoMessage() {} -func (*FileChunk) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} } +func (*FileChunk) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} } func (m *FileChunk) GetFileId() string { if m != nil { @@ -294,23 +338,86 @@ func (m *FileChunk) GetSourceFileId() string { return "" } +func (m *FileChunk) GetFid() *FileId { + if m != nil { + return m.Fid + } + return nil +} + +func (m *FileChunk) GetSourceFid() *FileId { + if m != nil { + return m.SourceFid + } + return nil +} + +func (m *FileChunk) GetCipherKey() []byte { + if m != nil { + return m.CipherKey + } + return nil +} + +func (m *FileChunk) GetIsGzipped() bool { + if m != nil { + return m.IsGzipped + } + return false +} + +type FileId struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + FileKey uint64 `protobuf:"varint,2,opt,name=file_key,json=fileKey" json:"file_key,omitempty"` + Cookie uint32 `protobuf:"fixed32,3,opt,name=cookie" json:"cookie,omitempty"` +} + +func (m *FileId) Reset() { *m = FileId{} } +func (m *FileId) String() string { return proto.CompactTextString(m) } +func (*FileId) ProtoMessage() {} +func (*FileId) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} } + +func (m *FileId) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *FileId) GetFileKey() uint64 { + if m != nil { + return m.FileKey + } + return 0 +} + +func (m *FileId) GetCookie() uint32 { + if m != nil { + return m.Cookie + } + return 0 +} + type FuseAttributes struct { - FileSize uint64 `protobuf:"varint,1,opt,name=file_size,json=fileSize" json:"file_size,omitempty"` - Mtime int64 `protobuf:"varint,2,opt,name=mtime" json:"mtime,omitempty"` - FileMode uint32 `protobuf:"varint,3,opt,name=file_mode,json=fileMode" json:"file_mode,omitempty"` - Uid uint32 `protobuf:"varint,4,opt,name=uid" json:"uid,omitempty"` - Gid uint32 `protobuf:"varint,5,opt,name=gid" json:"gid,omitempty"` - Crtime int64 `protobuf:"varint,6,opt,name=crtime" json:"crtime,omitempty"` - Mime string `protobuf:"bytes,7,opt,name=mime" json:"mime,omitempty"` - Replication string `protobuf:"bytes,8,opt,name=replication" json:"replication,omitempty"` - Collection string `protobuf:"bytes,9,opt,name=collection" json:"collection,omitempty"` - TtlSec int32 `protobuf:"varint,10,opt,name=ttl_sec,json=ttlSec" json:"ttl_sec,omitempty"` + FileSize uint64 `protobuf:"varint,1,opt,name=file_size,json=fileSize" json:"file_size,omitempty"` + Mtime int64 `protobuf:"varint,2,opt,name=mtime" json:"mtime,omitempty"` + FileMode uint32 `protobuf:"varint,3,opt,name=file_mode,json=fileMode" json:"file_mode,omitempty"` + Uid uint32 `protobuf:"varint,4,opt,name=uid" json:"uid,omitempty"` + Gid uint32 `protobuf:"varint,5,opt,name=gid" json:"gid,omitempty"` + Crtime int64 `protobuf:"varint,6,opt,name=crtime" json:"crtime,omitempty"` + Mime string `protobuf:"bytes,7,opt,name=mime" json:"mime,omitempty"` + Replication string `protobuf:"bytes,8,opt,name=replication" json:"replication,omitempty"` + Collection string `protobuf:"bytes,9,opt,name=collection" json:"collection,omitempty"` + TtlSec int32 `protobuf:"varint,10,opt,name=ttl_sec,json=ttlSec" json:"ttl_sec,omitempty"` + UserName string `protobuf:"bytes,11,opt,name=user_name,json=userName" json:"user_name,omitempty"` + GroupName []string `protobuf:"bytes,12,rep,name=group_name,json=groupName" json:"group_name,omitempty"` + SymlinkTarget string `protobuf:"bytes,13,opt,name=symlink_target,json=symlinkTarget" json:"symlink_target,omitempty"` } func (m *FuseAttributes) Reset() { *m = FuseAttributes{} } func (m *FuseAttributes) String() string { return proto.CompactTextString(m) } func (*FuseAttributes) ProtoMessage() {} -func (*FuseAttributes) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} } +func (*FuseAttributes) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} } func (m *FuseAttributes) GetFileSize() uint64 { if m != nil { @@ -382,15 +489,37 @@ func (m *FuseAttributes) GetTtlSec() int32 { return 0 } +func (m *FuseAttributes) GetUserName() string { + if m != nil { + return m.UserName + } + return "" +} + +func (m *FuseAttributes) GetGroupName() []string { + if m != nil { + return m.GroupName + } + return nil +} + +func (m *FuseAttributes) GetSymlinkTarget() string { + if m != nil { + return m.SymlinkTarget + } + return "" +} + type CreateEntryRequest struct { Directory string `protobuf:"bytes,1,opt,name=directory" json:"directory,omitempty"` Entry *Entry `protobuf:"bytes,2,opt,name=entry" json:"entry,omitempty"` + OExcl bool `protobuf:"varint,3,opt,name=o_excl,json=oExcl" json:"o_excl,omitempty"` } func (m *CreateEntryRequest) Reset() { *m = CreateEntryRequest{} } func (m *CreateEntryRequest) String() string { return proto.CompactTextString(m) } func (*CreateEntryRequest) ProtoMessage() {} -func (*CreateEntryRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} } +func (*CreateEntryRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} } func (m *CreateEntryRequest) GetDirectory() string { if m != nil { @@ -406,13 +535,28 @@ func (m *CreateEntryRequest) GetEntry() *Entry { return nil } +func (m *CreateEntryRequest) GetOExcl() bool { + if m != nil { + return m.OExcl + } + return false +} + type CreateEntryResponse struct { + Error string `protobuf:"bytes,1,opt,name=error" json:"error,omitempty"` } func (m *CreateEntryResponse) Reset() { *m = CreateEntryResponse{} } func (m *CreateEntryResponse) String() string { return proto.CompactTextString(m) } func (*CreateEntryResponse) ProtoMessage() {} -func (*CreateEntryResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} } +func (*CreateEntryResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} } + +func (m *CreateEntryResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} type UpdateEntryRequest struct { Directory string `protobuf:"bytes,1,opt,name=directory" json:"directory,omitempty"` @@ -422,7 +566,7 @@ type UpdateEntryRequest struct { func (m *UpdateEntryRequest) Reset() { *m = UpdateEntryRequest{} } func (m *UpdateEntryRequest) String() string { return proto.CompactTextString(m) } func (*UpdateEntryRequest) ProtoMessage() {} -func (*UpdateEntryRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} } +func (*UpdateEntryRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} } func (m *UpdateEntryRequest) GetDirectory() string { if m != nil { @@ -444,20 +588,21 @@ type UpdateEntryResponse struct { func (m *UpdateEntryResponse) Reset() { *m = UpdateEntryResponse{} } func (m *UpdateEntryResponse) String() string { return proto.CompactTextString(m) } func (*UpdateEntryResponse) ProtoMessage() {} -func (*UpdateEntryResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} } +func (*UpdateEntryResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} } type DeleteEntryRequest struct { - Directory string `protobuf:"bytes,1,opt,name=directory" json:"directory,omitempty"` - Name string `protobuf:"bytes,2,opt,name=name" json:"name,omitempty"` - IsDirectory bool `protobuf:"varint,3,opt,name=is_directory,json=isDirectory" json:"is_directory,omitempty"` - IsDeleteData bool `protobuf:"varint,4,opt,name=is_delete_data,json=isDeleteData" json:"is_delete_data,omitempty"` - IsRecursive bool `protobuf:"varint,5,opt,name=is_recursive,json=isRecursive" json:"is_recursive,omitempty"` + Directory string `protobuf:"bytes,1,opt,name=directory" json:"directory,omitempty"` + Name string `protobuf:"bytes,2,opt,name=name" json:"name,omitempty"` + // bool is_directory = 3; + IsDeleteData bool `protobuf:"varint,4,opt,name=is_delete_data,json=isDeleteData" json:"is_delete_data,omitempty"` + IsRecursive bool `protobuf:"varint,5,opt,name=is_recursive,json=isRecursive" json:"is_recursive,omitempty"` + IgnoreRecursiveError bool `protobuf:"varint,6,opt,name=ignore_recursive_error,json=ignoreRecursiveError" json:"ignore_recursive_error,omitempty"` } func (m *DeleteEntryRequest) Reset() { *m = DeleteEntryRequest{} } func (m *DeleteEntryRequest) String() string { return proto.CompactTextString(m) } func (*DeleteEntryRequest) ProtoMessage() {} -func (*DeleteEntryRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} } +func (*DeleteEntryRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} } func (m *DeleteEntryRequest) GetDirectory() string { if m != nil { @@ -473,34 +618,90 @@ func (m *DeleteEntryRequest) GetName() string { return "" } -func (m *DeleteEntryRequest) GetIsDirectory() bool { +func (m *DeleteEntryRequest) GetIsDeleteData() bool { if m != nil { - return m.IsDirectory + return m.IsDeleteData } return false } -func (m *DeleteEntryRequest) GetIsDeleteData() bool { +func (m *DeleteEntryRequest) GetIsRecursive() bool { if m != nil { - return m.IsDeleteData + return m.IsRecursive } return false } -func (m *DeleteEntryRequest) GetIsRecursive() bool { +func (m *DeleteEntryRequest) GetIgnoreRecursiveError() bool { if m != nil { - return m.IsRecursive + return m.IgnoreRecursiveError } return false } type DeleteEntryResponse struct { + Error string `protobuf:"bytes,1,opt,name=error" json:"error,omitempty"` } func (m *DeleteEntryResponse) Reset() { *m = DeleteEntryResponse{} } func (m *DeleteEntryResponse) String() string { return proto.CompactTextString(m) } func (*DeleteEntryResponse) ProtoMessage() {} -func (*DeleteEntryResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} } +func (*DeleteEntryResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{15} } + +func (m *DeleteEntryResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} + +type AtomicRenameEntryRequest struct { + OldDirectory string `protobuf:"bytes,1,opt,name=old_directory,json=oldDirectory" json:"old_directory,omitempty"` + OldName string `protobuf:"bytes,2,opt,name=old_name,json=oldName" json:"old_name,omitempty"` + NewDirectory string `protobuf:"bytes,3,opt,name=new_directory,json=newDirectory" json:"new_directory,omitempty"` + NewName string `protobuf:"bytes,4,opt,name=new_name,json=newName" json:"new_name,omitempty"` +} + +func (m *AtomicRenameEntryRequest) Reset() { *m = AtomicRenameEntryRequest{} } +func (m *AtomicRenameEntryRequest) String() string { return proto.CompactTextString(m) } +func (*AtomicRenameEntryRequest) ProtoMessage() {} +func (*AtomicRenameEntryRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{16} } + +func (m *AtomicRenameEntryRequest) GetOldDirectory() string { + if m != nil { + return m.OldDirectory + } + return "" +} + +func (m *AtomicRenameEntryRequest) GetOldName() string { + if m != nil { + return m.OldName + } + return "" +} + +func (m *AtomicRenameEntryRequest) GetNewDirectory() string { + if m != nil { + return m.NewDirectory + } + return "" +} + +func (m *AtomicRenameEntryRequest) GetNewName() string { + if m != nil { + return m.NewName + } + return "" +} + +type AtomicRenameEntryResponse struct { +} + +func (m *AtomicRenameEntryResponse) Reset() { *m = AtomicRenameEntryResponse{} } +func (m *AtomicRenameEntryResponse) String() string { return proto.CompactTextString(m) } +func (*AtomicRenameEntryResponse) ProtoMessage() {} +func (*AtomicRenameEntryResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{17} } type AssignVolumeRequest struct { Count int32 `protobuf:"varint,1,opt,name=count" json:"count,omitempty"` @@ -508,12 +709,13 @@ type AssignVolumeRequest struct { Replication string `protobuf:"bytes,3,opt,name=replication" json:"replication,omitempty"` TtlSec int32 `protobuf:"varint,4,opt,name=ttl_sec,json=ttlSec" json:"ttl_sec,omitempty"` DataCenter string `protobuf:"bytes,5,opt,name=data_center,json=dataCenter" json:"data_center,omitempty"` + ParentPath string `protobuf:"bytes,6,opt,name=parent_path,json=parentPath" json:"parent_path,omitempty"` } func (m *AssignVolumeRequest) Reset() { *m = AssignVolumeRequest{} } func (m *AssignVolumeRequest) String() string { return proto.CompactTextString(m) } func (*AssignVolumeRequest) ProtoMessage() {} -func (*AssignVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} } +func (*AssignVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{18} } func (m *AssignVolumeRequest) GetCount() int32 { if m != nil { @@ -550,17 +752,28 @@ func (m *AssignVolumeRequest) GetDataCenter() string { return "" } +func (m *AssignVolumeRequest) GetParentPath() string { + if m != nil { + return m.ParentPath + } + return "" +} + type AssignVolumeResponse struct { - FileId string `protobuf:"bytes,1,opt,name=file_id,json=fileId" json:"file_id,omitempty"` - Url string `protobuf:"bytes,2,opt,name=url" json:"url,omitempty"` - PublicUrl string `protobuf:"bytes,3,opt,name=public_url,json=publicUrl" json:"public_url,omitempty"` - Count int32 `protobuf:"varint,4,opt,name=count" json:"count,omitempty"` + FileId string `protobuf:"bytes,1,opt,name=file_id,json=fileId" json:"file_id,omitempty"` + Url string `protobuf:"bytes,2,opt,name=url" json:"url,omitempty"` + PublicUrl string `protobuf:"bytes,3,opt,name=public_url,json=publicUrl" json:"public_url,omitempty"` + Count int32 `protobuf:"varint,4,opt,name=count" json:"count,omitempty"` + Auth string `protobuf:"bytes,5,opt,name=auth" json:"auth,omitempty"` + Collection string `protobuf:"bytes,6,opt,name=collection" json:"collection,omitempty"` + Replication string `protobuf:"bytes,7,opt,name=replication" json:"replication,omitempty"` + Error string `protobuf:"bytes,8,opt,name=error" json:"error,omitempty"` } func (m *AssignVolumeResponse) Reset() { *m = AssignVolumeResponse{} } func (m *AssignVolumeResponse) String() string { return proto.CompactTextString(m) } func (*AssignVolumeResponse) ProtoMessage() {} -func (*AssignVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{15} } +func (*AssignVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{19} } func (m *AssignVolumeResponse) GetFileId() string { if m != nil { @@ -590,6 +803,34 @@ func (m *AssignVolumeResponse) GetCount() int32 { return 0 } +func (m *AssignVolumeResponse) GetAuth() string { + if m != nil { + return m.Auth + } + return "" +} + +func (m *AssignVolumeResponse) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *AssignVolumeResponse) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +func (m *AssignVolumeResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} + type LookupVolumeRequest struct { VolumeIds []string `protobuf:"bytes,1,rep,name=volume_ids,json=volumeIds" json:"volume_ids,omitempty"` } @@ -597,7 +838,7 @@ type LookupVolumeRequest struct { func (m *LookupVolumeRequest) Reset() { *m = LookupVolumeRequest{} } func (m *LookupVolumeRequest) String() string { return proto.CompactTextString(m) } func (*LookupVolumeRequest) ProtoMessage() {} -func (*LookupVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{16} } +func (*LookupVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{20} } func (m *LookupVolumeRequest) GetVolumeIds() []string { if m != nil { @@ -613,7 +854,7 @@ type Locations struct { func (m *Locations) Reset() { *m = Locations{} } func (m *Locations) String() string { return proto.CompactTextString(m) } func (*Locations) ProtoMessage() {} -func (*Locations) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{17} } +func (*Locations) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{21} } func (m *Locations) GetLocations() []*Location { if m != nil { @@ -630,7 +871,7 @@ type Location struct { func (m *Location) Reset() { *m = Location{} } func (m *Location) String() string { return proto.CompactTextString(m) } func (*Location) ProtoMessage() {} -func (*Location) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{18} } +func (*Location) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{22} } func (m *Location) GetUrl() string { if m != nil { @@ -653,7 +894,7 @@ type LookupVolumeResponse struct { func (m *LookupVolumeResponse) Reset() { *m = LookupVolumeResponse{} } func (m *LookupVolumeResponse) String() string { return proto.CompactTextString(m) } func (*LookupVolumeResponse) ProtoMessage() {} -func (*LookupVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{19} } +func (*LookupVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{23} } func (m *LookupVolumeResponse) GetLocationsMap() map[string]*Locations { if m != nil { @@ -669,7 +910,7 @@ type DeleteCollectionRequest struct { func (m *DeleteCollectionRequest) Reset() { *m = DeleteCollectionRequest{} } func (m *DeleteCollectionRequest) String() string { return proto.CompactTextString(m) } func (*DeleteCollectionRequest) ProtoMessage() {} -func (*DeleteCollectionRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{20} } +func (*DeleteCollectionRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{24} } func (m *DeleteCollectionRequest) GetCollection() string { if m != nil { @@ -684,7 +925,167 @@ type DeleteCollectionResponse struct { func (m *DeleteCollectionResponse) Reset() { *m = DeleteCollectionResponse{} } func (m *DeleteCollectionResponse) String() string { return proto.CompactTextString(m) } func (*DeleteCollectionResponse) ProtoMessage() {} -func (*DeleteCollectionResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{21} } +func (*DeleteCollectionResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{25} } + +type StatisticsRequest struct { + Replication string `protobuf:"bytes,1,opt,name=replication" json:"replication,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + Ttl string `protobuf:"bytes,3,opt,name=ttl" json:"ttl,omitempty"` +} + +func (m *StatisticsRequest) Reset() { *m = StatisticsRequest{} } +func (m *StatisticsRequest) String() string { return proto.CompactTextString(m) } +func (*StatisticsRequest) ProtoMessage() {} +func (*StatisticsRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{26} } + +func (m *StatisticsRequest) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +func (m *StatisticsRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *StatisticsRequest) GetTtl() string { + if m != nil { + return m.Ttl + } + return "" +} + +type StatisticsResponse struct { + Replication string `protobuf:"bytes,1,opt,name=replication" json:"replication,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + Ttl string `protobuf:"bytes,3,opt,name=ttl" json:"ttl,omitempty"` + TotalSize uint64 `protobuf:"varint,4,opt,name=total_size,json=totalSize" json:"total_size,omitempty"` + UsedSize uint64 `protobuf:"varint,5,opt,name=used_size,json=usedSize" json:"used_size,omitempty"` + FileCount uint64 `protobuf:"varint,6,opt,name=file_count,json=fileCount" json:"file_count,omitempty"` +} + +func (m *StatisticsResponse) Reset() { *m = StatisticsResponse{} } +func (m *StatisticsResponse) String() string { return proto.CompactTextString(m) } +func (*StatisticsResponse) ProtoMessage() {} +func (*StatisticsResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{27} } + +func (m *StatisticsResponse) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +func (m *StatisticsResponse) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *StatisticsResponse) GetTtl() string { + if m != nil { + return m.Ttl + } + return "" +} + +func (m *StatisticsResponse) GetTotalSize() uint64 { + if m != nil { + return m.TotalSize + } + return 0 +} + +func (m *StatisticsResponse) GetUsedSize() uint64 { + if m != nil { + return m.UsedSize + } + return 0 +} + +func (m *StatisticsResponse) GetFileCount() uint64 { + if m != nil { + return m.FileCount + } + return 0 +} + +type GetFilerConfigurationRequest struct { +} + +func (m *GetFilerConfigurationRequest) Reset() { *m = GetFilerConfigurationRequest{} } +func (m *GetFilerConfigurationRequest) String() string { return proto.CompactTextString(m) } +func (*GetFilerConfigurationRequest) ProtoMessage() {} +func (*GetFilerConfigurationRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{28} } + +type GetFilerConfigurationResponse struct { + Masters []string `protobuf:"bytes,1,rep,name=masters" json:"masters,omitempty"` + Replication string `protobuf:"bytes,2,opt,name=replication" json:"replication,omitempty"` + Collection string `protobuf:"bytes,3,opt,name=collection" json:"collection,omitempty"` + MaxMb uint32 `protobuf:"varint,4,opt,name=max_mb,json=maxMb" json:"max_mb,omitempty"` + DirBuckets string `protobuf:"bytes,5,opt,name=dir_buckets,json=dirBuckets" json:"dir_buckets,omitempty"` + DirQueues string `protobuf:"bytes,6,opt,name=dir_queues,json=dirQueues" json:"dir_queues,omitempty"` + Cipher bool `protobuf:"varint,7,opt,name=cipher" json:"cipher,omitempty"` +} + +func (m *GetFilerConfigurationResponse) Reset() { *m = GetFilerConfigurationResponse{} } +func (m *GetFilerConfigurationResponse) String() string { return proto.CompactTextString(m) } +func (*GetFilerConfigurationResponse) ProtoMessage() {} +func (*GetFilerConfigurationResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{29} } + +func (m *GetFilerConfigurationResponse) GetMasters() []string { + if m != nil { + return m.Masters + } + return nil +} + +func (m *GetFilerConfigurationResponse) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +func (m *GetFilerConfigurationResponse) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *GetFilerConfigurationResponse) GetMaxMb() uint32 { + if m != nil { + return m.MaxMb + } + return 0 +} + +func (m *GetFilerConfigurationResponse) GetDirBuckets() string { + if m != nil { + return m.DirBuckets + } + return "" +} + +func (m *GetFilerConfigurationResponse) GetDirQueues() string { + if m != nil { + return m.DirQueues + } + return "" +} + +func (m *GetFilerConfigurationResponse) GetCipher() bool { + if m != nil { + return m.Cipher + } + return false +} func init() { proto.RegisterType((*LookupDirectoryEntryRequest)(nil), "filer_pb.LookupDirectoryEntryRequest") @@ -692,8 +1093,10 @@ func init() { proto.RegisterType((*ListEntriesRequest)(nil), "filer_pb.ListEntriesRequest") proto.RegisterType((*ListEntriesResponse)(nil), "filer_pb.ListEntriesResponse") proto.RegisterType((*Entry)(nil), "filer_pb.Entry") + proto.RegisterType((*FullEntry)(nil), "filer_pb.FullEntry") proto.RegisterType((*EventNotification)(nil), "filer_pb.EventNotification") proto.RegisterType((*FileChunk)(nil), "filer_pb.FileChunk") + proto.RegisterType((*FileId)(nil), "filer_pb.FileId") proto.RegisterType((*FuseAttributes)(nil), "filer_pb.FuseAttributes") proto.RegisterType((*CreateEntryRequest)(nil), "filer_pb.CreateEntryRequest") proto.RegisterType((*CreateEntryResponse)(nil), "filer_pb.CreateEntryResponse") @@ -701,6 +1104,8 @@ func init() { proto.RegisterType((*UpdateEntryResponse)(nil), "filer_pb.UpdateEntryResponse") proto.RegisterType((*DeleteEntryRequest)(nil), "filer_pb.DeleteEntryRequest") proto.RegisterType((*DeleteEntryResponse)(nil), "filer_pb.DeleteEntryResponse") + proto.RegisterType((*AtomicRenameEntryRequest)(nil), "filer_pb.AtomicRenameEntryRequest") + proto.RegisterType((*AtomicRenameEntryResponse)(nil), "filer_pb.AtomicRenameEntryResponse") proto.RegisterType((*AssignVolumeRequest)(nil), "filer_pb.AssignVolumeRequest") proto.RegisterType((*AssignVolumeResponse)(nil), "filer_pb.AssignVolumeResponse") proto.RegisterType((*LookupVolumeRequest)(nil), "filer_pb.LookupVolumeRequest") @@ -709,6 +1114,10 @@ func init() { proto.RegisterType((*LookupVolumeResponse)(nil), "filer_pb.LookupVolumeResponse") proto.RegisterType((*DeleteCollectionRequest)(nil), "filer_pb.DeleteCollectionRequest") proto.RegisterType((*DeleteCollectionResponse)(nil), "filer_pb.DeleteCollectionResponse") + proto.RegisterType((*StatisticsRequest)(nil), "filer_pb.StatisticsRequest") + proto.RegisterType((*StatisticsResponse)(nil), "filer_pb.StatisticsResponse") + proto.RegisterType((*GetFilerConfigurationRequest)(nil), "filer_pb.GetFilerConfigurationRequest") + proto.RegisterType((*GetFilerConfigurationResponse)(nil), "filer_pb.GetFilerConfigurationResponse") } // Reference imports to suppress errors if they are not otherwise used. @@ -723,13 +1132,17 @@ const _ = grpc.SupportPackageIsVersion4 type SeaweedFilerClient interface { LookupDirectoryEntry(ctx context.Context, in *LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*LookupDirectoryEntryResponse, error) - ListEntries(ctx context.Context, in *ListEntriesRequest, opts ...grpc.CallOption) (*ListEntriesResponse, error) + ListEntries(ctx context.Context, in *ListEntriesRequest, opts ...grpc.CallOption) (SeaweedFiler_ListEntriesClient, error) CreateEntry(ctx context.Context, in *CreateEntryRequest, opts ...grpc.CallOption) (*CreateEntryResponse, error) UpdateEntry(ctx context.Context, in *UpdateEntryRequest, opts ...grpc.CallOption) (*UpdateEntryResponse, error) DeleteEntry(ctx context.Context, in *DeleteEntryRequest, opts ...grpc.CallOption) (*DeleteEntryResponse, error) + StreamDeleteEntries(ctx context.Context, opts ...grpc.CallOption) (SeaweedFiler_StreamDeleteEntriesClient, error) + AtomicRenameEntry(ctx context.Context, in *AtomicRenameEntryRequest, opts ...grpc.CallOption) (*AtomicRenameEntryResponse, error) AssignVolume(ctx context.Context, in *AssignVolumeRequest, opts ...grpc.CallOption) (*AssignVolumeResponse, error) LookupVolume(ctx context.Context, in *LookupVolumeRequest, opts ...grpc.CallOption) (*LookupVolumeResponse, error) DeleteCollection(ctx context.Context, in *DeleteCollectionRequest, opts ...grpc.CallOption) (*DeleteCollectionResponse, error) + Statistics(ctx context.Context, in *StatisticsRequest, opts ...grpc.CallOption) (*StatisticsResponse, error) + GetFilerConfiguration(ctx context.Context, in *GetFilerConfigurationRequest, opts ...grpc.CallOption) (*GetFilerConfigurationResponse, error) } type seaweedFilerClient struct { @@ -749,13 +1162,36 @@ func (c *seaweedFilerClient) LookupDirectoryEntry(ctx context.Context, in *Looku return out, nil } -func (c *seaweedFilerClient) ListEntries(ctx context.Context, in *ListEntriesRequest, opts ...grpc.CallOption) (*ListEntriesResponse, error) { - out := new(ListEntriesResponse) - err := grpc.Invoke(ctx, "/filer_pb.SeaweedFiler/ListEntries", in, out, c.cc, opts...) +func (c *seaweedFilerClient) ListEntries(ctx context.Context, in *ListEntriesRequest, opts ...grpc.CallOption) (SeaweedFiler_ListEntriesClient, error) { + stream, err := grpc.NewClientStream(ctx, &_SeaweedFiler_serviceDesc.Streams[0], c.cc, "/filer_pb.SeaweedFiler/ListEntries", opts...) if err != nil { return nil, err } - return out, nil + x := &seaweedFilerListEntriesClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type SeaweedFiler_ListEntriesClient interface { + Recv() (*ListEntriesResponse, error) + grpc.ClientStream +} + +type seaweedFilerListEntriesClient struct { + grpc.ClientStream +} + +func (x *seaweedFilerListEntriesClient) Recv() (*ListEntriesResponse, error) { + m := new(ListEntriesResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil } func (c *seaweedFilerClient) CreateEntry(ctx context.Context, in *CreateEntryRequest, opts ...grpc.CallOption) (*CreateEntryResponse, error) { @@ -785,6 +1221,46 @@ func (c *seaweedFilerClient) DeleteEntry(ctx context.Context, in *DeleteEntryReq return out, nil } +func (c *seaweedFilerClient) StreamDeleteEntries(ctx context.Context, opts ...grpc.CallOption) (SeaweedFiler_StreamDeleteEntriesClient, error) { + stream, err := grpc.NewClientStream(ctx, &_SeaweedFiler_serviceDesc.Streams[1], c.cc, "/filer_pb.SeaweedFiler/StreamDeleteEntries", opts...) + if err != nil { + return nil, err + } + x := &seaweedFilerStreamDeleteEntriesClient{stream} + return x, nil +} + +type SeaweedFiler_StreamDeleteEntriesClient interface { + Send(*DeleteEntryRequest) error + Recv() (*DeleteEntryResponse, error) + grpc.ClientStream +} + +type seaweedFilerStreamDeleteEntriesClient struct { + grpc.ClientStream +} + +func (x *seaweedFilerStreamDeleteEntriesClient) Send(m *DeleteEntryRequest) error { + return x.ClientStream.SendMsg(m) +} + +func (x *seaweedFilerStreamDeleteEntriesClient) Recv() (*DeleteEntryResponse, error) { + m := new(DeleteEntryResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *seaweedFilerClient) AtomicRenameEntry(ctx context.Context, in *AtomicRenameEntryRequest, opts ...grpc.CallOption) (*AtomicRenameEntryResponse, error) { + out := new(AtomicRenameEntryResponse) + err := grpc.Invoke(ctx, "/filer_pb.SeaweedFiler/AtomicRenameEntry", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + func (c *seaweedFilerClient) AssignVolume(ctx context.Context, in *AssignVolumeRequest, opts ...grpc.CallOption) (*AssignVolumeResponse, error) { out := new(AssignVolumeResponse) err := grpc.Invoke(ctx, "/filer_pb.SeaweedFiler/AssignVolume", in, out, c.cc, opts...) @@ -812,17 +1288,39 @@ func (c *seaweedFilerClient) DeleteCollection(ctx context.Context, in *DeleteCol return out, nil } +func (c *seaweedFilerClient) Statistics(ctx context.Context, in *StatisticsRequest, opts ...grpc.CallOption) (*StatisticsResponse, error) { + out := new(StatisticsResponse) + err := grpc.Invoke(ctx, "/filer_pb.SeaweedFiler/Statistics", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *seaweedFilerClient) GetFilerConfiguration(ctx context.Context, in *GetFilerConfigurationRequest, opts ...grpc.CallOption) (*GetFilerConfigurationResponse, error) { + out := new(GetFilerConfigurationResponse) + err := grpc.Invoke(ctx, "/filer_pb.SeaweedFiler/GetFilerConfiguration", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + // Server API for SeaweedFiler service type SeaweedFilerServer interface { LookupDirectoryEntry(context.Context, *LookupDirectoryEntryRequest) (*LookupDirectoryEntryResponse, error) - ListEntries(context.Context, *ListEntriesRequest) (*ListEntriesResponse, error) + ListEntries(*ListEntriesRequest, SeaweedFiler_ListEntriesServer) error CreateEntry(context.Context, *CreateEntryRequest) (*CreateEntryResponse, error) UpdateEntry(context.Context, *UpdateEntryRequest) (*UpdateEntryResponse, error) DeleteEntry(context.Context, *DeleteEntryRequest) (*DeleteEntryResponse, error) + StreamDeleteEntries(SeaweedFiler_StreamDeleteEntriesServer) error + AtomicRenameEntry(context.Context, *AtomicRenameEntryRequest) (*AtomicRenameEntryResponse, error) AssignVolume(context.Context, *AssignVolumeRequest) (*AssignVolumeResponse, error) LookupVolume(context.Context, *LookupVolumeRequest) (*LookupVolumeResponse, error) DeleteCollection(context.Context, *DeleteCollectionRequest) (*DeleteCollectionResponse, error) + Statistics(context.Context, *StatisticsRequest) (*StatisticsResponse, error) + GetFilerConfiguration(context.Context, *GetFilerConfigurationRequest) (*GetFilerConfigurationResponse, error) } func RegisterSeaweedFilerServer(s *grpc.Server, srv SeaweedFilerServer) { @@ -847,22 +1345,25 @@ func _SeaweedFiler_LookupDirectoryEntry_Handler(srv interface{}, ctx context.Con return interceptor(ctx, in, info, handler) } -func _SeaweedFiler_ListEntries_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(ListEntriesRequest) - if err := dec(in); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(SeaweedFilerServer).ListEntries(ctx, in) +func _SeaweedFiler_ListEntries_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(ListEntriesRequest) + if err := stream.RecvMsg(m); err != nil { + return err } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: "/filer_pb.SeaweedFiler/ListEntries", - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(SeaweedFilerServer).ListEntries(ctx, req.(*ListEntriesRequest)) - } - return interceptor(ctx, in, info, handler) + return srv.(SeaweedFilerServer).ListEntries(m, &seaweedFilerListEntriesServer{stream}) +} + +type SeaweedFiler_ListEntriesServer interface { + Send(*ListEntriesResponse) error + grpc.ServerStream +} + +type seaweedFilerListEntriesServer struct { + grpc.ServerStream +} + +func (x *seaweedFilerListEntriesServer) Send(m *ListEntriesResponse) error { + return x.ServerStream.SendMsg(m) } func _SeaweedFiler_CreateEntry_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { @@ -919,6 +1420,50 @@ func _SeaweedFiler_DeleteEntry_Handler(srv interface{}, ctx context.Context, dec return interceptor(ctx, in, info, handler) } +func _SeaweedFiler_StreamDeleteEntries_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(SeaweedFilerServer).StreamDeleteEntries(&seaweedFilerStreamDeleteEntriesServer{stream}) +} + +type SeaweedFiler_StreamDeleteEntriesServer interface { + Send(*DeleteEntryResponse) error + Recv() (*DeleteEntryRequest, error) + grpc.ServerStream +} + +type seaweedFilerStreamDeleteEntriesServer struct { + grpc.ServerStream +} + +func (x *seaweedFilerStreamDeleteEntriesServer) Send(m *DeleteEntryResponse) error { + return x.ServerStream.SendMsg(m) +} + +func (x *seaweedFilerStreamDeleteEntriesServer) Recv() (*DeleteEntryRequest, error) { + m := new(DeleteEntryRequest) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func _SeaweedFiler_AtomicRenameEntry_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(AtomicRenameEntryRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedFilerServer).AtomicRenameEntry(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/filer_pb.SeaweedFiler/AtomicRenameEntry", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedFilerServer).AtomicRenameEntry(ctx, req.(*AtomicRenameEntryRequest)) + } + return interceptor(ctx, in, info, handler) +} + func _SeaweedFiler_AssignVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(AssignVolumeRequest) if err := dec(in); err != nil { @@ -973,6 +1518,42 @@ func _SeaweedFiler_DeleteCollection_Handler(srv interface{}, ctx context.Context return interceptor(ctx, in, info, handler) } +func _SeaweedFiler_Statistics_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StatisticsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedFilerServer).Statistics(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/filer_pb.SeaweedFiler/Statistics", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedFilerServer).Statistics(ctx, req.(*StatisticsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _SeaweedFiler_GetFilerConfiguration_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetFilerConfigurationRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedFilerServer).GetFilerConfiguration(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/filer_pb.SeaweedFiler/GetFilerConfiguration", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedFilerServer).GetFilerConfiguration(ctx, req.(*GetFilerConfigurationRequest)) + } + return interceptor(ctx, in, info, handler) +} + var _SeaweedFiler_serviceDesc = grpc.ServiceDesc{ ServiceName: "filer_pb.SeaweedFiler", HandlerType: (*SeaweedFilerServer)(nil), @@ -982,10 +1563,6 @@ var _SeaweedFiler_serviceDesc = grpc.ServiceDesc{ Handler: _SeaweedFiler_LookupDirectoryEntry_Handler, }, { - MethodName: "ListEntries", - Handler: _SeaweedFiler_ListEntries_Handler, - }, - { MethodName: "CreateEntry", Handler: _SeaweedFiler_CreateEntry_Handler, }, @@ -998,6 +1575,10 @@ var _SeaweedFiler_serviceDesc = grpc.ServiceDesc{ Handler: _SeaweedFiler_DeleteEntry_Handler, }, { + MethodName: "AtomicRenameEntry", + Handler: _SeaweedFiler_AtomicRenameEntry_Handler, + }, + { MethodName: "AssignVolume", Handler: _SeaweedFiler_AssignVolume_Handler, }, @@ -1009,84 +1590,143 @@ var _SeaweedFiler_serviceDesc = grpc.ServiceDesc{ MethodName: "DeleteCollection", Handler: _SeaweedFiler_DeleteCollection_Handler, }, + { + MethodName: "Statistics", + Handler: _SeaweedFiler_Statistics_Handler, + }, + { + MethodName: "GetFilerConfiguration", + Handler: _SeaweedFiler_GetFilerConfiguration_Handler, + }, + }, + Streams: []grpc.StreamDesc{ + { + StreamName: "ListEntries", + Handler: _SeaweedFiler_ListEntries_Handler, + ServerStreams: true, + }, + { + StreamName: "StreamDeleteEntries", + Handler: _SeaweedFiler_StreamDeleteEntries_Handler, + ServerStreams: true, + ClientStreams: true, + }, }, - Streams: []grpc.StreamDesc{}, Metadata: "filer.proto", } func init() { proto.RegisterFile("filer.proto", fileDescriptor0) } var fileDescriptor0 = []byte{ - // 1129 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xb4, 0x57, 0x4d, 0x6f, 0xdc, 0xc4, - 0x1b, 0x8f, 0xf7, 0x2d, 0xeb, 0x67, 0x77, 0xf3, 0x6f, 0x66, 0xf3, 0xa7, 0xd6, 0x36, 0x29, 0x5b, - 0xd3, 0xa2, 0x54, 0x44, 0x51, 0x15, 0x38, 0xb4, 0x54, 0x48, 0x54, 0x79, 0x91, 0x2a, 0xa5, 0xad, - 0xe4, 0x34, 0x48, 0x88, 0x83, 0xe5, 0xd8, 0x4f, 0xc2, 0x28, 0x5e, 0x7b, 0xf1, 0x8c, 0x93, 0x96, - 0xaf, 0xc0, 0x95, 0x2b, 0x07, 0x4e, 0xdc, 0x38, 0xf0, 0x01, 0xb8, 0xf0, 0xc5, 0xd0, 0xbc, 0xd8, - 0x3b, 0x5e, 0x3b, 0x01, 0x84, 0xb8, 0xcd, 0xf3, 0x32, 0xcf, 0xdb, 0xfc, 0xe6, 0x37, 0x36, 0x0c, - 0xce, 0x69, 0x8c, 0xd9, 0xee, 0x3c, 0x4b, 0x79, 0x4a, 0xfa, 0x52, 0xf0, 0xe7, 0x67, 0xee, 0x1b, - 0xb8, 0x77, 0x9c, 0xa6, 0x97, 0xf9, 0xfc, 0x80, 0x66, 0x18, 0xf2, 0x34, 0x7b, 0x7f, 0x98, 0xf0, - 0xec, 0xbd, 0x87, 0xdf, 0xe5, 0xc8, 0x38, 0xd9, 0x04, 0x3b, 0x2a, 0x0c, 0x8e, 0x35, 0xb5, 0xb6, - 0x6d, 0x6f, 0xa1, 0x20, 0x04, 0x3a, 0x49, 0x30, 0x43, 0xa7, 0x25, 0x0d, 0x72, 0xed, 0x1e, 0xc2, - 0x66, 0x73, 0x40, 0x36, 0x4f, 0x13, 0x86, 0xe4, 0x11, 0x74, 0x51, 0x28, 0x64, 0xb4, 0xc1, 0xde, - 0xff, 0x76, 0x8b, 0x52, 0x76, 0x95, 0x9f, 0xb2, 0xba, 0xbf, 0x5b, 0x40, 0x8e, 0x29, 0xe3, 0x42, - 0x49, 0x91, 0xfd, 0xbd, 0x7a, 0x3e, 0x80, 0xde, 0x3c, 0xc3, 0x73, 0xfa, 0x4e, 0x57, 0xa4, 0x25, - 0xb2, 0x03, 0xeb, 0x8c, 0x07, 0x19, 0x3f, 0xca, 0xd2, 0xd9, 0x11, 0x8d, 0xf1, 0xb5, 0x28, 0xba, - 0x2d, 0x5d, 0xea, 0x06, 0xb2, 0x0b, 0x84, 0x26, 0x61, 0x9c, 0x33, 0x7a, 0x85, 0x27, 0x85, 0xd5, - 0xe9, 0x4c, 0xad, 0xed, 0xbe, 0xd7, 0x60, 0x21, 0x1b, 0xd0, 0x8d, 0xe9, 0x8c, 0x72, 0xa7, 0x3b, - 0xb5, 0xb6, 0x47, 0x9e, 0x12, 0xdc, 0x2f, 0x61, 0x5c, 0xa9, 0x5f, 0xb7, 0xff, 0x18, 0x56, 0x51, - 0xa9, 0x1c, 0x6b, 0xda, 0x6e, 0x1a, 0x40, 0x61, 0x77, 0x7f, 0x6a, 0x41, 0x57, 0xaa, 0xca, 0x39, - 0x5b, 0x8b, 0x39, 0x93, 0x07, 0x30, 0xa4, 0xcc, 0x5f, 0x0c, 0xa3, 0x25, 0xeb, 0x1b, 0x50, 0x56, - 0xce, 0x9d, 0x7c, 0x02, 0xbd, 0xf0, 0xdb, 0x3c, 0xb9, 0x64, 0x4e, 0x5b, 0xa6, 0x1a, 0x2f, 0x52, - 0x89, 0x66, 0xf7, 0x85, 0xcd, 0xd3, 0x2e, 0xe4, 0x29, 0x40, 0xc0, 0x79, 0x46, 0xcf, 0x72, 0x8e, - 0x4c, 0x76, 0x3b, 0xd8, 0x73, 0x8c, 0x0d, 0x39, 0xc3, 0x17, 0xa5, 0xdd, 0x33, 0x7c, 0xc9, 0x33, - 0xe8, 0xe3, 0x3b, 0x8e, 0x49, 0x84, 0x91, 0xd3, 0x95, 0x89, 0xb6, 0x96, 0x7a, 0xda, 0x3d, 0xd4, - 0x76, 0xd5, 0x61, 0xe9, 0x3e, 0x79, 0x0e, 0xa3, 0x8a, 0x89, 0xdc, 0x81, 0xf6, 0x25, 0x16, 0x27, - 0x2b, 0x96, 0x62, 0xba, 0x57, 0x41, 0x9c, 0x2b, 0x90, 0x0d, 0x3d, 0x25, 0x7c, 0xde, 0x7a, 0x6a, - 0xb9, 0x3f, 0x5a, 0xb0, 0x7e, 0x78, 0x85, 0x09, 0x7f, 0x9d, 0x72, 0x7a, 0x4e, 0xc3, 0x80, 0xd3, - 0x34, 0x21, 0x3b, 0x60, 0xa7, 0x71, 0xe4, 0xdf, 0x8a, 0xb1, 0x7e, 0x1a, 0xeb, 0x7c, 0x3b, 0x60, - 0x27, 0x78, 0xad, 0xbd, 0x5b, 0x37, 0x78, 0x27, 0x78, 0xad, 0xbc, 0x3f, 0x82, 0x51, 0x84, 0x31, - 0x72, 0xf4, 0xcb, 0xb9, 0x8a, 0xa1, 0x0f, 0x95, 0x52, 0xce, 0x93, 0xb9, 0x3f, 0x5b, 0x60, 0x97, - 0xe3, 0x25, 0x77, 0x61, 0x55, 0x84, 0xf3, 0x69, 0xa4, 0x9b, 0xea, 0x09, 0xf1, 0x65, 0x24, 0xb0, - 0x9a, 0x9e, 0x9f, 0x33, 0xe4, 0x32, 0x6d, 0xdb, 0xd3, 0x92, 0x38, 0x6b, 0x46, 0xbf, 0x57, 0xf0, - 0xec, 0x78, 0x72, 0x2d, 0x66, 0x30, 0xe3, 0x74, 0x86, 0xf2, 0x58, 0xda, 0x9e, 0x12, 0xc8, 0x18, - 0xba, 0xe8, 0xf3, 0xe0, 0x42, 0xe2, 0xce, 0xf6, 0x3a, 0xf8, 0x36, 0xb8, 0x20, 0x0f, 0x61, 0x8d, - 0xa5, 0x79, 0x16, 0xa2, 0x5f, 0xa4, 0xed, 0x49, 0xeb, 0x50, 0x69, 0x8f, 0x64, 0x72, 0xf7, 0x87, - 0x16, 0xac, 0x55, 0x4f, 0x94, 0xdc, 0x03, 0x5b, 0xee, 0x90, 0xc9, 0x2d, 0x99, 0x5c, 0xb2, 0xc4, - 0x49, 0xa5, 0x80, 0x96, 0x59, 0x40, 0xb1, 0x65, 0x96, 0x46, 0xaa, 0xde, 0x91, 0xda, 0xf2, 0x2a, - 0x8d, 0x50, 0x9c, 0x64, 0x4e, 0x23, 0x59, 0xf1, 0xc8, 0x13, 0x4b, 0xa1, 0xb9, 0xa0, 0x91, 0xbe, - 0x25, 0x62, 0x29, 0x66, 0x10, 0x66, 0x32, 0x6e, 0x4f, 0xcd, 0x40, 0x49, 0x62, 0x06, 0x33, 0xa1, - 0x5d, 0x55, 0x8d, 0x89, 0x35, 0x99, 0xc2, 0x20, 0xc3, 0x79, 0xac, 0x8f, 0xd9, 0xe9, 0x4b, 0x93, - 0xa9, 0x22, 0xf7, 0x01, 0xc2, 0x34, 0x8e, 0x31, 0x94, 0x0e, 0xb6, 0x74, 0x30, 0x34, 0xe2, 0x28, - 0x38, 0x8f, 0x7d, 0x86, 0xa1, 0x03, 0x53, 0x6b, 0xbb, 0xeb, 0xf5, 0x38, 0x8f, 0x4f, 0x30, 0x74, - 0xbf, 0x06, 0xb2, 0x9f, 0x61, 0xc0, 0xf1, 0x1f, 0x50, 0x5f, 0x49, 0x63, 0xad, 0x5b, 0x69, 0xec, - 0xff, 0x30, 0xae, 0x84, 0x56, 0x2c, 0x20, 0x32, 0x9e, 0xce, 0xa3, 0xff, 0x2a, 0x63, 0x25, 0xb4, - 0xce, 0xf8, 0x9b, 0x05, 0xe4, 0x40, 0xc2, 0xf4, 0xdf, 0xf1, 0x7b, 0x8d, 0x77, 0xda, 0x75, 0xde, - 0x79, 0x08, 0x6b, 0xc2, 0x45, 0xdd, 0x94, 0x28, 0xe0, 0x81, 0x26, 0xcf, 0x21, 0x65, 0xaa, 0x84, - 0x83, 0x80, 0x07, 0x3a, 0x50, 0x86, 0x61, 0x9e, 0x09, 0x3e, 0x95, 0xb8, 0x90, 0x81, 0xbc, 0x42, - 0x25, 0x7a, 0xa9, 0xd4, 0xac, 0x7b, 0xf9, 0xc5, 0x82, 0xf1, 0x0b, 0xc6, 0xe8, 0x45, 0xf2, 0x55, - 0x1a, 0xe7, 0x33, 0x2c, 0x9a, 0xd9, 0x80, 0x6e, 0x98, 0xe6, 0x09, 0x97, 0x8d, 0x74, 0x3d, 0x25, - 0x2c, 0xc1, 0xa2, 0x55, 0x83, 0xc5, 0x12, 0xb0, 0xda, 0x75, 0x60, 0x19, 0xc0, 0xe9, 0x98, 0xc0, - 0x21, 0x1f, 0xc2, 0x40, 0xb4, 0xe7, 0x87, 0x98, 0x70, 0xcc, 0xf4, 0x3d, 0x04, 0xa1, 0xda, 0x97, - 0x1a, 0xf7, 0x0a, 0x36, 0xaa, 0x85, 0xea, 0x57, 0xe0, 0x46, 0x56, 0x10, 0xb7, 0x26, 0x8b, 0x75, - 0x95, 0x62, 0x49, 0xb6, 0x00, 0xe6, 0xf9, 0x59, 0x4c, 0x43, 0x5f, 0x18, 0x54, 0x75, 0xb6, 0xd2, - 0x9c, 0x66, 0xf1, 0xa2, 0xe7, 0x8e, 0xd1, 0xb3, 0xfb, 0x19, 0x8c, 0xd5, 0x23, 0x5c, 0x1d, 0xd0, - 0x16, 0xc0, 0x95, 0x54, 0xf8, 0x34, 0x52, 0xef, 0x8f, 0xed, 0xd9, 0x4a, 0xf3, 0x32, 0x62, 0xee, - 0x17, 0x60, 0x1f, 0xa7, 0xaa, 0x67, 0x46, 0x9e, 0x80, 0x1d, 0x17, 0x82, 0x7e, 0xaa, 0xc8, 0x02, - 0x72, 0x85, 0x9f, 0xb7, 0x70, 0x72, 0x9f, 0x43, 0xbf, 0x50, 0x17, 0x7d, 0x58, 0x37, 0xf5, 0xd1, - 0x5a, 0xea, 0xc3, 0xfd, 0xc3, 0x82, 0x8d, 0x6a, 0xc9, 0x7a, 0x54, 0xa7, 0x30, 0x2a, 0x53, 0xf8, - 0xb3, 0x60, 0xae, 0x6b, 0x79, 0x62, 0xd6, 0x52, 0xdf, 0x56, 0x16, 0xc8, 0x5e, 0x05, 0x73, 0x85, - 0x9e, 0x61, 0x6c, 0xa8, 0x26, 0x6f, 0x61, 0xbd, 0xe6, 0xd2, 0xf0, 0xfa, 0x3c, 0x36, 0x5f, 0x9f, - 0xca, 0x0b, 0x5a, 0xee, 0x36, 0x9f, 0xa4, 0x67, 0x70, 0x57, 0x01, 0x76, 0xbf, 0xc4, 0x57, 0x31, - 0xfb, 0x2a, 0x0c, 0xad, 0x65, 0x18, 0xba, 0x13, 0x70, 0xea, 0x5b, 0x55, 0x33, 0x7b, 0xbf, 0x76, - 0x61, 0x78, 0x82, 0xc1, 0x35, 0x62, 0x24, 0x08, 0x3c, 0x23, 0x17, 0xc5, 0xb0, 0xaa, 0x1f, 0x59, - 0xe4, 0xd1, 0xf2, 0x54, 0x1a, 0xbf, 0xea, 0x26, 0x1f, 0xff, 0x95, 0x9b, 0xbe, 0x68, 0x2b, 0xe4, - 0x18, 0x06, 0xc6, 0x57, 0x0c, 0xd9, 0x34, 0x36, 0xd6, 0x3e, 0xce, 0x26, 0x5b, 0x37, 0x58, 0xcd, - 0x68, 0x06, 0x1b, 0x9a, 0xd1, 0xea, 0xfc, 0x6b, 0x46, 0x6b, 0xa2, 0x50, 0x19, 0xcd, 0x60, 0x3a, - 0x33, 0x5a, 0x9d, 0x5b, 0xcd, 0x68, 0x4d, 0xf4, 0x28, 0xa3, 0x19, 0x5c, 0x63, 0x46, 0xab, 0xd3, - 0xa6, 0x19, 0xad, 0x89, 0xa0, 0x56, 0xc8, 0x1b, 0x18, 0x9a, 0x17, 0x9f, 0x18, 0x1b, 0x1a, 0x98, - 0x6b, 0x72, 0xff, 0x26, 0xb3, 0x19, 0xd0, 0xc4, 0xb9, 0x19, 0xb0, 0xe1, 0xa6, 0x9b, 0x01, 0x9b, - 0xae, 0x87, 0xbb, 0x42, 0xbe, 0x81, 0x3b, 0xcb, 0x78, 0x23, 0x0f, 0x96, 0xdb, 0xaa, 0xc1, 0x78, - 0xe2, 0xde, 0xe6, 0x52, 0x04, 0x3f, 0xeb, 0xc9, 0xdf, 0x8c, 0x4f, 0xff, 0x0c, 0x00, 0x00, 0xff, - 0xff, 0x04, 0x12, 0x06, 0xcd, 0x75, 0x0c, 0x00, 0x00, + // 1759 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xb4, 0x58, 0x4f, 0x6f, 0xdb, 0xc8, + 0x15, 0x0f, 0x25, 0x4b, 0x16, 0x9f, 0xa4, 0xac, 0x3d, 0x72, 0xb2, 0x8a, 0x62, 0xa7, 0x5e, 0xa6, + 0xd9, 0xba, 0x48, 0xe0, 0x06, 0xee, 0x1e, 0x76, 0xbb, 0xed, 0x21, 0x71, 0x9c, 0x45, 0xd0, 0x24, + 0x9b, 0xd2, 0x49, 0xb1, 0x45, 0x81, 0x12, 0x34, 0x39, 0x96, 0xa7, 0xa6, 0x38, 0xdc, 0x99, 0x61, + 0x6c, 0xef, 0x47, 0x29, 0xd0, 0x43, 0xbf, 0x43, 0x8f, 0x45, 0x2f, 0x45, 0x81, 0x7e, 0x8e, 0x1e, + 0x7b, 0xe8, 0x67, 0x28, 0xe6, 0x0d, 0x49, 0x0d, 0x45, 0xd9, 0xde, 0xed, 0x62, 0x6f, 0x9c, 0xf7, + 0x6f, 0xde, 0xfc, 0xde, 0x5f, 0x09, 0xfa, 0xc7, 0x2c, 0xa1, 0x62, 0x37, 0x13, 0x5c, 0x71, 0xd2, + 0xc3, 0x43, 0x90, 0x1d, 0x79, 0x5f, 0xc2, 0xdd, 0x97, 0x9c, 0x9f, 0xe6, 0xd9, 0x33, 0x26, 0x68, + 0xa4, 0xb8, 0xb8, 0x38, 0x48, 0x95, 0xb8, 0xf0, 0xe9, 0xd7, 0x39, 0x95, 0x8a, 0x6c, 0x82, 0x1b, + 0x97, 0x8c, 0xb1, 0xb3, 0xed, 0xec, 0xb8, 0xfe, 0x9c, 0x40, 0x08, 0xac, 0xa4, 0xe1, 0x8c, 0x8e, + 0x5b, 0xc8, 0xc0, 0x6f, 0xef, 0x00, 0x36, 0x97, 0x1b, 0x94, 0x19, 0x4f, 0x25, 0x25, 0x0f, 0xa0, + 0x43, 0x35, 0x01, 0xad, 0xf5, 0xf7, 0x3e, 0xd8, 0x2d, 0x5d, 0xd9, 0x35, 0x72, 0x86, 0xeb, 0xfd, + 0xdd, 0x01, 0xf2, 0x92, 0x49, 0xa5, 0x89, 0x8c, 0xca, 0x6f, 0xe7, 0xcf, 0x6d, 0xe8, 0x66, 0x82, + 0x1e, 0xb3, 0xf3, 0xc2, 0xa3, 0xe2, 0x44, 0x1e, 0xc1, 0xba, 0x54, 0xa1, 0x50, 0xcf, 0x05, 0x9f, + 0x3d, 0x67, 0x09, 0x7d, 0xad, 0x9d, 0x6e, 0xa3, 0x48, 0x93, 0x41, 0x76, 0x81, 0xb0, 0x34, 0x4a, + 0x72, 0xc9, 0xde, 0xd3, 0xc3, 0x92, 0x3b, 0x5e, 0xd9, 0x76, 0x76, 0x7a, 0xfe, 0x12, 0x0e, 0xd9, + 0x80, 0x4e, 0xc2, 0x66, 0x4c, 0x8d, 0x3b, 0xdb, 0xce, 0xce, 0xd0, 0x37, 0x07, 0xef, 0x97, 0x30, + 0xaa, 0xf9, 0xff, 0xdd, 0x9e, 0xff, 0xe7, 0x16, 0x74, 0x90, 0x50, 0x61, 0xec, 0xcc, 0x31, 0x26, + 0x1f, 0xc1, 0x80, 0xc9, 0x60, 0x0e, 0x44, 0x0b, 0x7d, 0xeb, 0x33, 0x59, 0x61, 0x4e, 0x1e, 0x42, + 0x37, 0x3a, 0xc9, 0xd3, 0x53, 0x39, 0x6e, 0x6f, 0xb7, 0x77, 0xfa, 0x7b, 0xa3, 0xf9, 0x45, 0xfa, + 0xa1, 0xfb, 0x9a, 0xe7, 0x17, 0x22, 0xe4, 0x53, 0x80, 0x50, 0x29, 0xc1, 0x8e, 0x72, 0x45, 0x25, + 0xbe, 0xb4, 0xbf, 0x37, 0xb6, 0x14, 0x72, 0x49, 0x9f, 0x54, 0x7c, 0xdf, 0x92, 0x25, 0x9f, 0x41, + 0x8f, 0x9e, 0x2b, 0x9a, 0xc6, 0x34, 0x1e, 0x77, 0xf0, 0xa2, 0xad, 0x85, 0x17, 0xed, 0x1e, 0x14, + 0x7c, 0xf3, 0xbe, 0x4a, 0x7c, 0xf2, 0x39, 0x0c, 0x6b, 0x2c, 0xb2, 0x06, 0xed, 0x53, 0x5a, 0x46, + 0x55, 0x7f, 0x6a, 0x64, 0xdf, 0x87, 0x49, 0x6e, 0x12, 0x6c, 0xe0, 0x9b, 0xc3, 0x2f, 0x5a, 0x9f, + 0x3a, 0xde, 0x33, 0x70, 0x9f, 0xe7, 0x49, 0x52, 0x29, 0xc6, 0x4c, 0x94, 0x8a, 0x31, 0x13, 0x73, + 0x94, 0x5b, 0x57, 0xa2, 0xfc, 0x37, 0x07, 0xd6, 0x0f, 0xde, 0xd3, 0x54, 0xbd, 0xe6, 0x8a, 0x1d, + 0xb3, 0x28, 0x54, 0x8c, 0xa7, 0xe4, 0x11, 0xb8, 0x3c, 0x89, 0x83, 0x2b, 0xc3, 0xd4, 0xe3, 0x49, + 0xe1, 0xf5, 0x23, 0x70, 0x53, 0x7a, 0x16, 0x5c, 0x79, 0x5d, 0x2f, 0xa5, 0x67, 0x46, 0xfa, 0x3e, + 0x0c, 0x63, 0x9a, 0x50, 0x45, 0x83, 0x2a, 0x3a, 0x3a, 0x74, 0x03, 0x43, 0xdc, 0x37, 0xe1, 0xf8, + 0x18, 0x3e, 0xd0, 0x26, 0xb3, 0x50, 0xd0, 0x54, 0x05, 0x59, 0xa8, 0x4e, 0x30, 0x26, 0xae, 0x3f, + 0x4c, 0xe9, 0xd9, 0x1b, 0xa4, 0xbe, 0x09, 0xd5, 0x89, 0xf7, 0xd7, 0x16, 0xb8, 0x55, 0x30, 0xc9, + 0x87, 0xb0, 0xaa, 0xaf, 0x0d, 0x58, 0x5c, 0x20, 0xd1, 0xd5, 0xc7, 0x17, 0xb1, 0xae, 0x0a, 0x7e, + 0x7c, 0x2c, 0xa9, 0x42, 0xf7, 0xda, 0x7e, 0x71, 0xd2, 0x99, 0x25, 0xd9, 0x37, 0xa6, 0x10, 0x56, + 0x7c, 0xfc, 0xd6, 0x88, 0xcf, 0x14, 0x9b, 0x51, 0xbc, 0xb0, 0xed, 0x9b, 0x03, 0x19, 0x41, 0x87, + 0x06, 0x2a, 0x9c, 0x62, 0x86, 0xbb, 0xfe, 0x0a, 0x7d, 0x1b, 0x4e, 0xc9, 0x8f, 0xe1, 0xa6, 0xe4, + 0xb9, 0x88, 0x68, 0x50, 0x5e, 0xdb, 0x45, 0xee, 0xc0, 0x50, 0x9f, 0x9b, 0xcb, 0x3d, 0x68, 0x1f, + 0xb3, 0x78, 0xbc, 0x8a, 0xc0, 0xac, 0xd5, 0x93, 0xf0, 0x45, 0xec, 0x6b, 0x26, 0xf9, 0x19, 0x40, + 0x65, 0x29, 0x1e, 0xf7, 0x2e, 0x11, 0x75, 0x4b, 0xbb, 0x31, 0xd9, 0x02, 0x88, 0x58, 0x76, 0x42, + 0x45, 0xa0, 0x13, 0xc6, 0xc5, 0xe4, 0x70, 0x0d, 0xe5, 0xd7, 0xf4, 0x42, 0xb3, 0x99, 0x0c, 0xa6, + 0xdf, 0xb0, 0x2c, 0xa3, 0xf1, 0x18, 0x10, 0x61, 0x97, 0xc9, 0x2f, 0x0c, 0xc1, 0xfb, 0x0a, 0xba, + 0x85, 0x73, 0x77, 0xc1, 0x7d, 0xcf, 0x93, 0x7c, 0x56, 0x81, 0x36, 0xf4, 0x7b, 0x86, 0xf0, 0x22, + 0x26, 0x77, 0x00, 0xbb, 0x24, 0x5e, 0xd1, 0x42, 0x88, 0x10, 0x5f, 0x7d, 0xc1, 0x6d, 0xe8, 0x46, + 0x9c, 0x9f, 0x32, 0x83, 0xdd, 0xaa, 0x5f, 0x9c, 0xbc, 0xff, 0xb6, 0xe0, 0x66, 0xbd, 0x58, 0xf4, + 0x15, 0x68, 0x05, 0x91, 0x76, 0xd0, 0x0c, 0x9a, 0x3d, 0xac, 0xa1, 0xdd, 0xb2, 0xd1, 0x2e, 0x55, + 0x66, 0x3c, 0x36, 0x17, 0x0c, 0x8d, 0xca, 0x2b, 0x1e, 0x53, 0x9d, 0xeb, 0x39, 0x8b, 0x31, 0x3c, + 0x43, 0x5f, 0x7f, 0x6a, 0xca, 0x94, 0xc5, 0x45, 0xf3, 0xd1, 0x9f, 0xe8, 0x9e, 0x40, 0xbb, 0x5d, + 0x13, 0x70, 0x73, 0xd2, 0x01, 0x9f, 0x69, 0xea, 0xaa, 0x89, 0xa2, 0xfe, 0x26, 0xdb, 0xd0, 0x17, + 0x34, 0x4b, 0x8a, 0xdc, 0x47, 0xf0, 0x5d, 0xdf, 0x26, 0x91, 0x7b, 0x00, 0x11, 0x4f, 0x12, 0x1a, + 0xa1, 0x80, 0x8b, 0x02, 0x16, 0x45, 0xe7, 0x9d, 0x52, 0x49, 0x20, 0x69, 0x84, 0x50, 0x77, 0xfc, + 0xae, 0x52, 0xc9, 0x21, 0x8d, 0xf4, 0x3b, 0x72, 0x49, 0x45, 0x80, 0xed, 0xab, 0x8f, 0x7a, 0x3d, + 0x4d, 0xc0, 0x26, 0xbb, 0x05, 0x30, 0x15, 0x3c, 0xcf, 0x0c, 0x77, 0xb0, 0xdd, 0xd6, 0x9d, 0x1c, + 0x29, 0xc8, 0x7e, 0x00, 0x37, 0xe5, 0xc5, 0x2c, 0x61, 0xe9, 0x69, 0xa0, 0x42, 0x31, 0xa5, 0x6a, + 0x3c, 0x34, 0x15, 0x50, 0x50, 0xdf, 0x22, 0xd1, 0xcb, 0x80, 0xec, 0x0b, 0x1a, 0x2a, 0xfa, 0x1d, + 0x86, 0xd6, 0xb7, 0xeb, 0x0d, 0xe4, 0x16, 0x74, 0x79, 0x40, 0xcf, 0xa3, 0xa4, 0x28, 0xd1, 0x0e, + 0x3f, 0x38, 0x8f, 0x12, 0xef, 0x21, 0x8c, 0x6a, 0x37, 0x16, 0x6d, 0x7d, 0x03, 0x3a, 0x54, 0x08, + 0x5e, 0x36, 0x21, 0x73, 0xf0, 0x7e, 0x07, 0xe4, 0x5d, 0x16, 0xff, 0x10, 0xee, 0x79, 0xb7, 0x60, + 0x54, 0x33, 0x6d, 0xfc, 0xf0, 0xfe, 0xe9, 0x00, 0x79, 0x86, 0xbd, 0xe4, 0xfb, 0x8d, 0x71, 0x5d, + 0xdd, 0x7a, 0xc4, 0x98, 0x5e, 0x15, 0x87, 0x2a, 0x2c, 0x06, 0xe0, 0x80, 0x49, 0x63, 0xff, 0x59, + 0xa8, 0xc2, 0x62, 0x10, 0x09, 0x1a, 0xe5, 0x42, 0xcf, 0x44, 0x4c, 0x42, 0x1c, 0x44, 0x7e, 0x49, + 0x22, 0x9f, 0xc0, 0x6d, 0x36, 0x4d, 0xb9, 0xa0, 0x73, 0xb1, 0xc0, 0x40, 0xd5, 0x45, 0xe1, 0x0d, + 0xc3, 0xad, 0x14, 0x0e, 0x10, 0xb9, 0x87, 0x30, 0xaa, 0x3d, 0xe3, 0x4a, 0x98, 0xff, 0xe4, 0xc0, + 0xf8, 0x89, 0xe2, 0x33, 0x16, 0xf9, 0x54, 0x3b, 0x5f, 0x7b, 0xfa, 0x7d, 0x18, 0xea, 0x6e, 0xbe, + 0xf8, 0xfc, 0x01, 0x4f, 0xe2, 0xf9, 0xb4, 0xbc, 0x03, 0xba, 0xa1, 0x07, 0x16, 0x0a, 0xab, 0x3c, + 0x89, 0x31, 0x13, 0xef, 0x83, 0xee, 0xba, 0x96, 0xbe, 0xd9, 0x1b, 0x06, 0x29, 0x3d, 0xab, 0xe9, + 0x6b, 0x21, 0xd4, 0x37, 0xad, 0x7a, 0x35, 0xa5, 0x67, 0x5a, 0xdf, 0xbb, 0x0b, 0x77, 0x96, 0xf8, + 0x56, 0x84, 0xeb, 0x5f, 0x0e, 0x8c, 0x9e, 0x48, 0xc9, 0xa6, 0xe9, 0x6f, 0xb1, 0xed, 0x94, 0x4e, + 0x6f, 0x40, 0x27, 0xe2, 0x79, 0xaa, 0xd0, 0xd9, 0x8e, 0x6f, 0x0e, 0x0b, 0x95, 0xd8, 0x6a, 0x54, + 0xe2, 0x42, 0x2d, 0xb7, 0x9b, 0xb5, 0x6c, 0xd5, 0xea, 0x4a, 0xad, 0x56, 0x7f, 0x04, 0x7d, 0x1d, + 0xe4, 0x20, 0xa2, 0xa9, 0xa2, 0xa2, 0xe8, 0xf3, 0xa0, 0x49, 0xfb, 0x48, 0xd1, 0x02, 0xf6, 0x3c, + 0x32, 0xad, 0x1e, 0xb2, 0xf9, 0x30, 0xfa, 0xb7, 0x03, 0x1b, 0xf5, 0xa7, 0x14, 0x31, 0xbb, 0x74, + 0x2e, 0xe9, 0x56, 0x26, 0x92, 0xe2, 0x1d, 0xfa, 0x53, 0x37, 0x85, 0x2c, 0x3f, 0x4a, 0x58, 0x14, + 0x68, 0x86, 0xf1, 0xdf, 0x35, 0x94, 0x77, 0x22, 0x99, 0xa3, 0xb2, 0x62, 0xa3, 0x42, 0x60, 0x25, + 0xcc, 0xd5, 0x49, 0x39, 0x9b, 0xf4, 0xf7, 0x02, 0x52, 0xdd, 0xeb, 0x90, 0x5a, 0x6d, 0x22, 0x55, + 0x65, 0x5a, 0xcf, 0xce, 0xb4, 0x4f, 0x60, 0x64, 0x96, 0xdb, 0x7a, 0xb8, 0xb6, 0x00, 0xaa, 0x39, + 0x22, 0xc7, 0x8e, 0x69, 0x66, 0xe5, 0x20, 0x91, 0xde, 0xaf, 0xc0, 0x7d, 0xc9, 0x8d, 0x5d, 0x49, + 0x1e, 0x83, 0x9b, 0x94, 0x07, 0x14, 0xed, 0xef, 0x91, 0x79, 0x8d, 0x97, 0x72, 0xfe, 0x5c, 0xc8, + 0xfb, 0x1c, 0x7a, 0x25, 0xb9, 0xc4, 0xcc, 0xb9, 0x0c, 0xb3, 0xd6, 0x02, 0x66, 0xde, 0x3f, 0x1c, + 0xd8, 0xa8, 0xbb, 0x5c, 0x84, 0xe5, 0x1d, 0x0c, 0xab, 0x2b, 0x82, 0x59, 0x98, 0x15, 0xbe, 0x3c, + 0xb6, 0x7d, 0x69, 0xaa, 0x55, 0x0e, 0xca, 0x57, 0x61, 0x66, 0x72, 0x79, 0x90, 0x58, 0xa4, 0xc9, + 0x5b, 0x58, 0x6f, 0x88, 0x2c, 0xd9, 0xec, 0x7e, 0x6a, 0x6f, 0x76, 0xb5, 0xed, 0xb4, 0xd2, 0xb6, + 0xd7, 0xbd, 0xcf, 0xe0, 0x43, 0xd3, 0x0e, 0xf6, 0xab, 0x18, 0x96, 0xd8, 0xd7, 0x43, 0xed, 0x2c, + 0x86, 0xda, 0x9b, 0xc0, 0xb8, 0xa9, 0x5a, 0x94, 0xdf, 0x14, 0xd6, 0x0f, 0x55, 0xa8, 0x98, 0x54, + 0x2c, 0xaa, 0x7e, 0x62, 0x2c, 0xe4, 0x86, 0x73, 0xdd, 0x44, 0x6c, 0xd6, 0xe1, 0x1a, 0xb4, 0x95, + 0x2a, 0xf3, 0x57, 0x7f, 0xea, 0x28, 0x10, 0xfb, 0xa6, 0x22, 0x06, 0x3f, 0xc0, 0x55, 0x3a, 0x1f, + 0x14, 0x57, 0x61, 0x62, 0x36, 0x8e, 0x15, 0xdc, 0x38, 0x5c, 0xa4, 0xe0, 0xca, 0x61, 0x86, 0x72, + 0x6c, 0xb8, 0x1d, 0xb3, 0x8f, 0x68, 0x02, 0x32, 0xb7, 0x00, 0xb0, 0x54, 0x4d, 0x95, 0x75, 0x8d, + 0xae, 0xa6, 0xec, 0x6b, 0x82, 0x77, 0x0f, 0x36, 0xbf, 0xa0, 0x4a, 0xef, 0x4e, 0x62, 0x9f, 0xa7, + 0xc7, 0x6c, 0x9a, 0x8b, 0xd0, 0x0a, 0x85, 0xf7, 0x1f, 0x07, 0xb6, 0x2e, 0x11, 0x28, 0x1e, 0x3c, + 0x86, 0xd5, 0x59, 0x28, 0x15, 0x15, 0x65, 0x95, 0x94, 0xc7, 0x45, 0x28, 0x5a, 0xd7, 0x41, 0xd1, + 0x6e, 0x40, 0x71, 0x0b, 0xba, 0xb3, 0xf0, 0x3c, 0x98, 0x1d, 0x15, 0xcb, 0x51, 0x67, 0x16, 0x9e, + 0xbf, 0x3a, 0xc2, 0xce, 0xc6, 0x44, 0x70, 0x94, 0x47, 0xa7, 0x54, 0xc9, 0xaa, 0xb3, 0x31, 0xf1, + 0xd4, 0x50, 0xf4, 0xa3, 0xb5, 0xc0, 0xd7, 0x39, 0xcd, 0xa9, 0x2c, 0x7a, 0x85, 0x1e, 0x8e, 0xbf, + 0x41, 0x02, 0x2e, 0x53, 0xb8, 0x59, 0x62, 0x97, 0xe8, 0xf9, 0xc5, 0x69, 0xef, 0x2f, 0x3d, 0x18, + 0x1c, 0xd2, 0xf0, 0x8c, 0xd2, 0x18, 0x1f, 0x4c, 0xa6, 0x65, 0xa1, 0xd5, 0x7f, 0xf8, 0x92, 0x07, + 0x8b, 0x15, 0xb5, 0xf4, 0x97, 0xf6, 0xe4, 0xe3, 0xeb, 0xc4, 0x8a, 0x9c, 0xbd, 0x41, 0x5e, 0x43, + 0xdf, 0xfa, 0x65, 0x49, 0x36, 0x2d, 0xc5, 0xc6, 0x0f, 0xe6, 0xc9, 0xd6, 0x25, 0xdc, 0xd2, 0xda, + 0x63, 0x87, 0xbc, 0x84, 0xbe, 0xb5, 0xd2, 0xd8, 0xf6, 0x9a, 0xbb, 0x95, 0x6d, 0x6f, 0xc9, 0x1e, + 0xe4, 0xdd, 0xd0, 0xd6, 0xac, 0xc5, 0xc4, 0xb6, 0xd6, 0x5c, 0x85, 0x6c, 0x6b, 0xcb, 0xb6, 0x19, + 0xb4, 0x66, 0xed, 0x01, 0xb6, 0xb5, 0xe6, 0x96, 0x63, 0x5b, 0x5b, 0xb2, 0x3c, 0x78, 0x37, 0xc8, + 0x57, 0x30, 0x3a, 0x54, 0x82, 0x86, 0xb3, 0x39, 0x7b, 0x01, 0xc1, 0xff, 0xc3, 0xea, 0x8e, 0xf3, + 0xd8, 0x21, 0x7f, 0x80, 0xf5, 0xc6, 0x94, 0x27, 0xde, 0x5c, 0xf3, 0xb2, 0xf5, 0x64, 0x72, 0xff, + 0x4a, 0x99, 0xca, 0xf3, 0x2f, 0x61, 0x60, 0x0f, 0x57, 0x62, 0x39, 0xb5, 0x64, 0x7f, 0x98, 0xdc, + 0xbb, 0x8c, 0x6d, 0x1b, 0xb4, 0xfb, 0xbb, 0x6d, 0x70, 0xc9, 0x84, 0xb3, 0x0d, 0x2e, 0x1b, 0x0b, + 0xde, 0x0d, 0xf2, 0x7b, 0x58, 0x5b, 0xec, 0xb3, 0xe4, 0xa3, 0x45, 0xe8, 0x1a, 0xed, 0x7b, 0xe2, + 0x5d, 0x25, 0x52, 0x19, 0x7f, 0x01, 0x30, 0x6f, 0x9f, 0xe4, 0xee, 0x5c, 0xa7, 0xd1, 0xbe, 0x27, + 0x9b, 0xcb, 0x99, 0x95, 0xa9, 0x3f, 0xc2, 0xad, 0xa5, 0x3d, 0x8a, 0x58, 0x05, 0x78, 0x55, 0x97, + 0x9b, 0xfc, 0xe4, 0x5a, 0xb9, 0xf2, 0xae, 0xa7, 0xf7, 0x60, 0x4d, 0x9a, 0x16, 0x71, 0x2c, 0x77, + 0xa3, 0x84, 0xd1, 0x54, 0x3d, 0x05, 0xd4, 0x78, 0x23, 0xb8, 0xe2, 0x47, 0x5d, 0xfc, 0x37, 0xee, + 0xe7, 0xff, 0x0b, 0x00, 0x00, 0xff, 0xff, 0x35, 0x0b, 0x9e, 0x2e, 0x9c, 0x13, 0x00, 0x00, } diff --git a/weed/pb/filer_pb/filer_pb_helper.go b/weed/pb/filer_pb/filer_pb_helper.go new file mode 100644 index 000000000..96ab2154f --- /dev/null +++ b/weed/pb/filer_pb/filer_pb_helper.go @@ -0,0 +1,105 @@ +package filer_pb + +import ( + "context" + "errors" + "fmt" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func toFileIdObject(fileIdStr string) (*FileId, error) { + t, err := needle.ParseFileIdFromString(fileIdStr) + if err != nil { + return nil, err + } + return &FileId{ + VolumeId: uint32(t.VolumeId), + Cookie: uint32(t.Cookie), + FileKey: uint64(t.Key), + }, nil + +} + +func (fid *FileId) toFileIdString() string { + return needle.NewFileId(needle.VolumeId(fid.VolumeId), fid.FileKey, fid.Cookie).String() +} + +func (c *FileChunk) GetFileIdString() string { + if c.FileId != "" { + return c.FileId + } + if c.Fid != nil { + c.FileId = c.Fid.toFileIdString() + return c.FileId + } + return "" +} + +func BeforeEntrySerialization(chunks []*FileChunk) { + + for _, chunk := range chunks { + + if chunk.FileId != "" { + if fid, err := toFileIdObject(chunk.FileId); err == nil { + chunk.Fid = fid + chunk.FileId = "" + } + } + + if chunk.SourceFileId != "" { + if fid, err := toFileIdObject(chunk.SourceFileId); err == nil { + chunk.SourceFid = fid + chunk.SourceFileId = "" + } + } + + } +} + +func AfterEntryDeserialization(chunks []*FileChunk) { + + for _, chunk := range chunks { + + if chunk.Fid != nil && chunk.FileId == "" { + chunk.FileId = chunk.Fid.toFileIdString() + } + + if chunk.SourceFid != nil && chunk.SourceFileId == "" { + chunk.SourceFileId = chunk.SourceFid.toFileIdString() + } + + } +} + +func CreateEntry(client SeaweedFilerClient, request *CreateEntryRequest) error { + resp, err := client.CreateEntry(context.Background(), request) + if err != nil { + glog.V(1).Infof("create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, err) + return fmt.Errorf("CreateEntry: %v", err) + } + if resp.Error != "" { + glog.V(1).Infof("create entry %s/%s %v: %v", request.Directory, request.Entry.Name, request.OExcl, err) + return fmt.Errorf("CreateEntry : %v", resp.Error) + } + return nil +} + +func LookupEntry(client SeaweedFilerClient, request *LookupDirectoryEntryRequest) (*LookupDirectoryEntryResponse, error) { + resp, err := client.LookupDirectoryEntry(context.Background(), request) + if err != nil { + if err == ErrNotFound || strings.Contains(err.Error(), ErrNotFound.Error()) { + return nil, ErrNotFound + } + glog.V(3).Infof("read %s/%v: %v", request.Directory, request.Name, err) + return nil, fmt.Errorf("LookupEntry1: %v", err) + } + if resp.Entry == nil { + return nil, ErrNotFound + } + return resp, nil +} + +var ErrNotFound = errors.New("filer: no entry is found in filer store") diff --git a/weed/pb/filer_pb/filer_pb_helper_test.go b/weed/pb/filer_pb/filer_pb_helper_test.go new file mode 100644 index 000000000..d4468c011 --- /dev/null +++ b/weed/pb/filer_pb/filer_pb_helper_test.go @@ -0,0 +1,17 @@ +package filer_pb + +import ( + "testing" + + "github.com/golang/protobuf/proto" +) + +func TestFileIdSize(t *testing.T) { + fileIdStr := "11745,0293434534cbb9892b" + + fid, _ := toFileIdObject(fileIdStr) + bytes, _ := proto.Marshal(fid) + + println(len(fileIdStr)) + println(len(bytes)) +} diff --git a/weed/pb/grpc_client_server.go b/weed/pb/grpc_client_server.go new file mode 100644 index 000000000..4b5f9eff3 --- /dev/null +++ b/weed/pb/grpc_client_server.go @@ -0,0 +1,182 @@ +package pb + +import ( + "context" + "fmt" + "net/http" + "strconv" + "strings" + "sync" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/keepalive" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" +) + +var ( + // cache grpc connections + grpcClients = make(map[string]*grpc.ClientConn) + grpcClientsLock sync.Mutex +) + +func init() { + http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = 1024 +} + +func NewGrpcServer(opts ...grpc.ServerOption) *grpc.Server { + var options []grpc.ServerOption + options = append(options, grpc.KeepaliveParams(keepalive.ServerParameters{ + Time: 10 * time.Second, // wait time before ping if no activity + Timeout: 20 * time.Second, // ping timeout + }), grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ + MinTime: 60 * time.Second, // min time a client should wait before sending a ping + PermitWithoutStream: true, + })) + for _, opt := range opts { + if opt != nil { + options = append(options, opt) + } + } + return grpc.NewServer(options...) +} + +func GrpcDial(ctx context.Context, address string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { + // opts = append(opts, grpc.WithBlock()) + // opts = append(opts, grpc.WithTimeout(time.Duration(5*time.Second))) + var options []grpc.DialOption + options = append(options, + // grpc.WithInsecure(), + grpc.WithKeepaliveParams(keepalive.ClientParameters{ + Time: 30 * time.Second, // client ping server if no activity for this long + Timeout: 20 * time.Second, + PermitWithoutStream: true, + })) + for _, opt := range opts { + if opt != nil { + options = append(options, opt) + } + } + return grpc.DialContext(ctx, address, options...) +} + +func WithCachedGrpcClient(fn func(*grpc.ClientConn) error, address string, opts ...grpc.DialOption) error { + + grpcClientsLock.Lock() + + existingConnection, found := grpcClients[address] + if found { + grpcClientsLock.Unlock() + err := fn(existingConnection) + if err != nil { + grpcClientsLock.Lock() + delete(grpcClients, address) + grpcClientsLock.Unlock() + existingConnection.Close() + } + return err + } + + grpcConnection, err := GrpcDial(context.Background(), address, opts...) + if err != nil { + grpcClientsLock.Unlock() + return fmt.Errorf("fail to dial %s: %v", address, err) + } + + grpcClients[address] = grpcConnection + grpcClientsLock.Unlock() + + err = fn(grpcConnection) + if err != nil { + grpcClientsLock.Lock() + delete(grpcClients, address) + grpcClientsLock.Unlock() + grpcConnection.Close() + } + + return err +} + +func ParseServerToGrpcAddress(server string) (serverGrpcAddress string, err error) { + colonIndex := strings.LastIndex(server, ":") + if colonIndex < 0 { + return "", fmt.Errorf("server should have hostname:port format: %v", server) + } + + port, parseErr := strconv.ParseUint(server[colonIndex+1:], 10, 64) + if parseErr != nil { + return "", fmt.Errorf("server port parse error: %v", parseErr) + } + + grpcPort := int(port) + 10000 + + return fmt.Sprintf("%s:%d", server[:colonIndex], grpcPort), nil +} + +func ServerToGrpcAddress(server string) (serverGrpcAddress string) { + hostnameAndPort := strings.Split(server, ":") + if len(hostnameAndPort) != 2 { + return fmt.Sprintf("unexpected server address: %s", server) + } + + port, parseErr := strconv.ParseUint(hostnameAndPort[1], 10, 64) + if parseErr != nil { + return fmt.Sprintf("failed to parse port for %s:%s", hostnameAndPort[0], hostnameAndPort[1]) + } + + grpcPort := int(port) + 10000 + + return fmt.Sprintf("%s:%d", hostnameAndPort[0], grpcPort) +} + +func WithMasterClient(master string, grpcDialOption grpc.DialOption, fn func(client master_pb.SeaweedClient) error) error { + + masterGrpcAddress, parseErr := ParseServerToGrpcAddress(master) + if parseErr != nil { + return fmt.Errorf("failed to parse master grpc %v: %v", master, parseErr) + } + + return WithCachedGrpcClient(func(grpcConnection *grpc.ClientConn) error { + client := master_pb.NewSeaweedClient(grpcConnection) + return fn(client) + }, masterGrpcAddress, grpcDialOption) + +} + +func WithFilerClient(filer string, grpcDialOption grpc.DialOption, fn func(client filer_pb.SeaweedFilerClient) error) error { + + filerGrpcAddress, parseErr := ParseServerToGrpcAddress(filer) + if parseErr != nil { + return fmt.Errorf("failed to parse filer grpc %v: %v", filer, parseErr) + } + + return WithGrpcFilerClient(filerGrpcAddress, grpcDialOption, fn) + +} + +func WithGrpcFilerClient(filerGrpcAddress string, grpcDialOption grpc.DialOption, fn func(client filer_pb.SeaweedFilerClient) error) error { + + return WithCachedGrpcClient(func(grpcConnection *grpc.ClientConn) error { + client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return fn(client) + }, filerGrpcAddress, grpcDialOption) + +} + +func ParseFilerGrpcAddress(filer string) (filerGrpcAddress string, err error) { + hostnameAndPort := strings.Split(filer, ":") + if len(hostnameAndPort) != 2 { + return "", fmt.Errorf("filer should have hostname:port format: %v", hostnameAndPort) + } + + filerPort, parseErr := strconv.ParseUint(hostnameAndPort[1], 10, 64) + if parseErr != nil { + return "", fmt.Errorf("filer port parse error: %v", parseErr) + } + + filerGrpcPort := int(filerPort) + 10000 + + return fmt.Sprintf("%s:%d", hostnameAndPort[0], filerGrpcPort), nil +} diff --git a/weed/pb/iam.proto b/weed/pb/iam.proto new file mode 100644 index 000000000..2eef22dd9 --- /dev/null +++ b/weed/pb/iam.proto @@ -0,0 +1,50 @@ +syntax = "proto3"; + +package iam_pb; + +option java_package = "seaweedfs.client"; +option java_outer_classname = "IamProto"; + +////////////////////////////////////////////////// + +service SeaweedIdentityAccessManagement { + +} + +////////////////////////////////////////////////// + +message S3ApiConfiguration { + repeated Identity identities = 1; +} + +message Identity { + string name = 1; + repeated Credential credentials = 2; + repeated string actions = 3; +} + +message Credential { + string access_key = 1; + string secret_key = 2; + // uint64 expiration = 3; + // bool is_disabled = 4; +} + +/* +message Policy { + repeated Statement statements = 1; +} + +message Statement { + repeated Action action = 1; + repeated Resource resource = 2; +} + +message Action { + string action = 1; +} +message Resource { + string bucket = 1; + // string path = 2; +} +*/
\ No newline at end of file diff --git a/weed/pb/iam_pb/iam.pb.go b/weed/pb/iam_pb/iam.pb.go new file mode 100644 index 000000000..b7d7b038b --- /dev/null +++ b/weed/pb/iam_pb/iam.pb.go @@ -0,0 +1,174 @@ +// Code generated by protoc-gen-go. +// source: iam.proto +// DO NOT EDIT! + +/* +Package iam_pb is a generated protocol buffer package. + +It is generated from these files: + iam.proto + +It has these top-level messages: + S3ApiConfiguration + Identity + Credential +*/ +package iam_pb + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" + +import ( + context "golang.org/x/net/context" + grpc "google.golang.org/grpc" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type S3ApiConfiguration struct { + Identities []*Identity `protobuf:"bytes,1,rep,name=identities" json:"identities,omitempty"` +} + +func (m *S3ApiConfiguration) Reset() { *m = S3ApiConfiguration{} } +func (m *S3ApiConfiguration) String() string { return proto.CompactTextString(m) } +func (*S3ApiConfiguration) ProtoMessage() {} +func (*S3ApiConfiguration) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} } + +func (m *S3ApiConfiguration) GetIdentities() []*Identity { + if m != nil { + return m.Identities + } + return nil +} + +type Identity struct { + Name string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + Credentials []*Credential `protobuf:"bytes,2,rep,name=credentials" json:"credentials,omitempty"` + Actions []string `protobuf:"bytes,3,rep,name=actions" json:"actions,omitempty"` +} + +func (m *Identity) Reset() { *m = Identity{} } +func (m *Identity) String() string { return proto.CompactTextString(m) } +func (*Identity) ProtoMessage() {} +func (*Identity) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} } + +func (m *Identity) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +func (m *Identity) GetCredentials() []*Credential { + if m != nil { + return m.Credentials + } + return nil +} + +func (m *Identity) GetActions() []string { + if m != nil { + return m.Actions + } + return nil +} + +type Credential struct { + AccessKey string `protobuf:"bytes,1,opt,name=access_key,json=accessKey" json:"access_key,omitempty"` + SecretKey string `protobuf:"bytes,2,opt,name=secret_key,json=secretKey" json:"secret_key,omitempty"` +} + +func (m *Credential) Reset() { *m = Credential{} } +func (m *Credential) String() string { return proto.CompactTextString(m) } +func (*Credential) ProtoMessage() {} +func (*Credential) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{2} } + +func (m *Credential) GetAccessKey() string { + if m != nil { + return m.AccessKey + } + return "" +} + +func (m *Credential) GetSecretKey() string { + if m != nil { + return m.SecretKey + } + return "" +} + +func init() { + proto.RegisterType((*S3ApiConfiguration)(nil), "iam_pb.S3ApiConfiguration") + proto.RegisterType((*Identity)(nil), "iam_pb.Identity") + proto.RegisterType((*Credential)(nil), "iam_pb.Credential") +} + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConn + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion4 + +// Client API for SeaweedIdentityAccessManagement service + +type SeaweedIdentityAccessManagementClient interface { +} + +type seaweedIdentityAccessManagementClient struct { + cc *grpc.ClientConn +} + +func NewSeaweedIdentityAccessManagementClient(cc *grpc.ClientConn) SeaweedIdentityAccessManagementClient { + return &seaweedIdentityAccessManagementClient{cc} +} + +// Server API for SeaweedIdentityAccessManagement service + +type SeaweedIdentityAccessManagementServer interface { +} + +func RegisterSeaweedIdentityAccessManagementServer(s *grpc.Server, srv SeaweedIdentityAccessManagementServer) { + s.RegisterService(&_SeaweedIdentityAccessManagement_serviceDesc, srv) +} + +var _SeaweedIdentityAccessManagement_serviceDesc = grpc.ServiceDesc{ + ServiceName: "iam_pb.SeaweedIdentityAccessManagement", + HandlerType: (*SeaweedIdentityAccessManagementServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{}, + Metadata: "iam.proto", +} + +func init() { proto.RegisterFile("iam.proto", fileDescriptor0) } + +var fileDescriptor0 = []byte{ + // 250 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x4c, 0x90, 0x41, 0x4b, 0xc3, 0x40, + 0x10, 0x85, 0x69, 0x23, 0xb5, 0x99, 0x5e, 0xca, 0x9c, 0xf6, 0xa0, 0x18, 0x73, 0xca, 0x29, 0x48, + 0xeb, 0x1f, 0xa8, 0x05, 0xa1, 0x16, 0x41, 0xd2, 0x1f, 0x50, 0xa6, 0xdb, 0x69, 0x19, 0xec, 0x6e, + 0x42, 0x76, 0x45, 0xf2, 0xef, 0x25, 0xbb, 0x46, 0x7b, 0xdb, 0x7d, 0xdf, 0x7b, 0xb3, 0x3b, 0x0f, + 0x52, 0x21, 0x53, 0x36, 0x6d, 0xed, 0x6b, 0x9c, 0x08, 0x99, 0x7d, 0x73, 0xc8, 0x5f, 0x01, 0x77, + 0xcb, 0x55, 0x23, 0xeb, 0xda, 0x9e, 0xe4, 0xfc, 0xd5, 0x92, 0x97, 0xda, 0xe2, 0x13, 0x80, 0x1c, + 0xd9, 0x7a, 0xf1, 0xc2, 0x4e, 0x8d, 0xb2, 0xa4, 0x98, 0x2d, 0xe6, 0x65, 0x8c, 0x94, 0x9b, 0x48, + 0xba, 0xea, 0xca, 0x93, 0x5b, 0x98, 0x0e, 0x3a, 0x22, 0xdc, 0x58, 0x32, 0xac, 0x46, 0xd9, 0xa8, + 0x48, 0xab, 0x70, 0xc6, 0x67, 0x98, 0xe9, 0x96, 0x83, 0x83, 0x2e, 0x4e, 0x8d, 0xc3, 0x48, 0x1c, + 0x46, 0xae, 0xff, 0x50, 0x75, 0x6d, 0x43, 0x05, 0xb7, 0xa4, 0xfb, 0x1f, 0x39, 0x95, 0x64, 0x49, + 0x91, 0x56, 0xc3, 0x35, 0x7f, 0x03, 0xf8, 0x0f, 0xe1, 0x3d, 0x00, 0x69, 0xcd, 0xce, 0xed, 0x3f, + 0xb9, 0xfb, 0x7d, 0x37, 0x8d, 0xca, 0x96, 0xbb, 0x1e, 0x3b, 0xd6, 0x2d, 0xfb, 0x80, 0xc7, 0x11, + 0x47, 0x65, 0xcb, 0xdd, 0xe2, 0x11, 0x1e, 0x76, 0x4c, 0xdf, 0xcc, 0xc7, 0x61, 0x85, 0x55, 0x88, + 0xbe, 0x93, 0xa5, 0x33, 0x1b, 0xb6, 0xfe, 0xe5, 0x0e, 0xe6, 0x2e, 0x5a, 0x4e, 0xae, 0xd4, 0x17, + 0xe9, 0xb5, 0xe9, 0x86, 0xcc, 0x47, 0x5f, 0xe6, 0x61, 0x12, 0x3a, 0x5d, 0xfe, 0x04, 0x00, 0x00, + 0xff, 0xff, 0x83, 0x4f, 0x61, 0x03, 0x60, 0x01, 0x00, 0x00, +} diff --git a/weed/pb/master.proto b/weed/pb/master.proto index 3f348a4bd..4310b2602 100644 --- a/weed/pb/master.proto +++ b/weed/pb/master.proto @@ -7,10 +7,26 @@ package master_pb; service Seaweed { rpc SendHeartbeat (stream Heartbeat) returns (stream HeartbeatResponse) { } - rpc KeepConnected (stream ClientListenRequest) returns (stream VolumeLocation) { + rpc KeepConnected (stream KeepConnectedRequest) returns (stream VolumeLocation) { } rpc LookupVolume (LookupVolumeRequest) returns (LookupVolumeResponse) { } + rpc Assign (AssignRequest) returns (AssignResponse) { + } + rpc Statistics (StatisticsRequest) returns (StatisticsResponse) { + } + rpc CollectionList (CollectionListRequest) returns (CollectionListResponse) { + } + rpc CollectionDelete (CollectionDeleteRequest) returns (CollectionDeleteResponse) { + } + rpc VolumeList (VolumeListRequest) returns (VolumeListResponse) { + } + rpc LookupEcVolume (LookupEcVolumeRequest) returns (LookupEcVolumeResponse) { + } + rpc GetMasterConfiguration (GetMasterConfigurationRequest) returns (GetMasterConfigurationResponse) { + } + rpc ListMasterClients (ListMasterClientsRequest) returns (ListMasterClientsResponse) { + } } ////////////////////////////////////////////////// @@ -25,15 +41,26 @@ message Heartbeat { string rack = 7; uint32 admin_port = 8; repeated VolumeInformationMessage volumes = 9; - // delta volume ids - repeated uint32 new_vids = 10; - repeated uint32 deleted_vids = 11; + // delta volumes + repeated VolumeShortInformationMessage new_volumes = 10; + repeated VolumeShortInformationMessage deleted_volumes = 11; + bool has_no_volumes = 12; + + // erasure coding + repeated VolumeEcShardInformationMessage ec_shards = 16; + // delta erasure coding shards + repeated VolumeEcShardInformationMessage new_ec_shards = 17; + repeated VolumeEcShardInformationMessage deleted_ec_shards = 18; + bool has_no_ec_shards = 19; + } message HeartbeatResponse { - uint64 volumeSizeLimit = 1; - string secretKey = 2; - string leader = 3; + uint64 volume_size_limit = 1; + string leader = 2; + string metrics_address = 3; + uint32 metrics_interval_seconds = 4; + repeated StorageBackend storage_backends = 5; } message VolumeInformationMessage { @@ -47,6 +74,30 @@ message VolumeInformationMessage { uint32 replica_placement = 8; uint32 version = 9; uint32 ttl = 10; + uint32 compact_revision = 11; + int64 modified_at_second = 12; + string remote_storage_name = 13; + string remote_storage_key = 14; +} + +message VolumeShortInformationMessage { + uint32 id = 1; + string collection = 3; + uint32 replica_placement = 8; + uint32 version = 9; + uint32 ttl = 10; +} + +message VolumeEcShardInformationMessage { + uint32 id = 1; + string collection = 2; + uint32 ec_index_bits = 3; +} + +message StorageBackend { + string type = 1; + string id = 2; + map<string, string> properties = 3; } message Empty { @@ -61,8 +112,9 @@ message SuperBlockExtra { ErasureCoding erasure_coding = 1; } -message ClientListenRequest { +message KeepConnectedRequest { string name = 1; + uint32 grpc_port = 2; } message VolumeLocation { @@ -70,6 +122,7 @@ message VolumeLocation { string public_url = 2; repeated uint32 new_vids = 3; repeated uint32 deleted_vids = 4; + string leader = 5; // optional when leader is not itself } message LookupVolumeRequest { @@ -89,3 +142,135 @@ message Location { string url = 1; string public_url = 2; } + +message AssignRequest { + uint64 count = 1; + string replication = 2; + string collection = 3; + string ttl = 4; + string data_center = 5; + string rack = 6; + string data_node = 7; + uint32 memory_map_max_size_mb = 8; + uint32 Writable_volume_count = 9; +} +message AssignResponse { + string fid = 1; + string url = 2; + string public_url = 3; + uint64 count = 4; + string error = 5; + string auth = 6; +} + +message StatisticsRequest { + string replication = 1; + string collection = 2; + string ttl = 3; +} +message StatisticsResponse { + string replication = 1; + string collection = 2; + string ttl = 3; + uint64 total_size = 4; + uint64 used_size = 5; + uint64 file_count = 6; +} + +// +// collection related +// + +message StorageType { + string replication = 1; + string ttl = 2; +} +message Collection { + string name = 1; +} +message CollectionListRequest { + bool include_normal_volumes = 1; + bool include_ec_volumes = 2; +} +message CollectionListResponse { + repeated Collection collections = 1; +} + +message CollectionDeleteRequest { + string name = 1; +} +message CollectionDeleteResponse { +} + +// +// volume related +// +message DataNodeInfo { + string id = 1; + uint64 volume_count = 2; + uint64 max_volume_count = 3; + uint64 free_volume_count = 4; + uint64 active_volume_count = 5; + repeated VolumeInformationMessage volume_infos = 6; + repeated VolumeEcShardInformationMessage ec_shard_infos = 7; + uint64 remote_volume_count = 8; +} +message RackInfo { + string id = 1; + uint64 volume_count = 2; + uint64 max_volume_count = 3; + uint64 free_volume_count = 4; + uint64 active_volume_count = 5; + repeated DataNodeInfo data_node_infos = 6; + uint64 remote_volume_count = 7; +} +message DataCenterInfo { + string id = 1; + uint64 volume_count = 2; + uint64 max_volume_count = 3; + uint64 free_volume_count = 4; + uint64 active_volume_count = 5; + repeated RackInfo rack_infos = 6; + uint64 remote_volume_count = 7; +} +message TopologyInfo { + string id = 1; + uint64 volume_count = 2; + uint64 max_volume_count = 3; + uint64 free_volume_count = 4; + uint64 active_volume_count = 5; + repeated DataCenterInfo data_center_infos = 6; + uint64 remote_volume_count = 7; +} +message VolumeListRequest { +} +message VolumeListResponse { + TopologyInfo topology_info = 1; + uint64 volume_size_limit_mb = 2; +} + +message LookupEcVolumeRequest { + uint32 volume_id = 1; +} +message LookupEcVolumeResponse { + uint32 volume_id = 1; + message EcShardIdLocation { + uint32 shard_id = 1; + repeated Location locations = 2; + } + repeated EcShardIdLocation shard_id_locations = 2; +} + +message GetMasterConfigurationRequest { +} +message GetMasterConfigurationResponse { + string metrics_address = 1; + uint32 metrics_interval_seconds = 2; +} + +message ListMasterClientsRequest { + string client_type = 1; +} +message ListMasterClientsResponse { + repeated string grpc_addresses = 1; +} diff --git a/weed/pb/master_pb/master.pb.go b/weed/pb/master_pb/master.pb.go index d99c789d7..c33e2b768 100644 --- a/weed/pb/master_pb/master.pb.go +++ b/weed/pb/master_pb/master.pb.go @@ -12,13 +12,38 @@ It has these top-level messages: Heartbeat HeartbeatResponse VolumeInformationMessage + VolumeShortInformationMessage + VolumeEcShardInformationMessage + StorageBackend Empty SuperBlockExtra - ClientListenRequest + KeepConnectedRequest VolumeLocation LookupVolumeRequest LookupVolumeResponse Location + AssignRequest + AssignResponse + StatisticsRequest + StatisticsResponse + StorageType + Collection + CollectionListRequest + CollectionListResponse + CollectionDeleteRequest + CollectionDeleteResponse + DataNodeInfo + RackInfo + DataCenterInfo + TopologyInfo + VolumeListRequest + VolumeListResponse + LookupEcVolumeRequest + LookupEcVolumeResponse + GetMasterConfigurationRequest + GetMasterConfigurationResponse + ListMasterClientsRequest + ListMasterClientsResponse */ package master_pb @@ -52,9 +77,16 @@ type Heartbeat struct { Rack string `protobuf:"bytes,7,opt,name=rack" json:"rack,omitempty"` AdminPort uint32 `protobuf:"varint,8,opt,name=admin_port,json=adminPort" json:"admin_port,omitempty"` Volumes []*VolumeInformationMessage `protobuf:"bytes,9,rep,name=volumes" json:"volumes,omitempty"` - // delta volume ids - NewVids []uint32 `protobuf:"varint,10,rep,packed,name=new_vids,json=newVids" json:"new_vids,omitempty"` - DeletedVids []uint32 `protobuf:"varint,11,rep,packed,name=deleted_vids,json=deletedVids" json:"deleted_vids,omitempty"` + // delta volumes + NewVolumes []*VolumeShortInformationMessage `protobuf:"bytes,10,rep,name=new_volumes,json=newVolumes" json:"new_volumes,omitempty"` + DeletedVolumes []*VolumeShortInformationMessage `protobuf:"bytes,11,rep,name=deleted_volumes,json=deletedVolumes" json:"deleted_volumes,omitempty"` + HasNoVolumes bool `protobuf:"varint,12,opt,name=has_no_volumes,json=hasNoVolumes" json:"has_no_volumes,omitempty"` + // erasure coding + EcShards []*VolumeEcShardInformationMessage `protobuf:"bytes,16,rep,name=ec_shards,json=ecShards" json:"ec_shards,omitempty"` + // delta erasure coding shards + NewEcShards []*VolumeEcShardInformationMessage `protobuf:"bytes,17,rep,name=new_ec_shards,json=newEcShards" json:"new_ec_shards,omitempty"` + DeletedEcShards []*VolumeEcShardInformationMessage `protobuf:"bytes,18,rep,name=deleted_ec_shards,json=deletedEcShards" json:"deleted_ec_shards,omitempty"` + HasNoEcShards bool `protobuf:"varint,19,opt,name=has_no_ec_shards,json=hasNoEcShards" json:"has_no_ec_shards,omitempty"` } func (m *Heartbeat) Reset() { *m = Heartbeat{} } @@ -125,24 +157,61 @@ func (m *Heartbeat) GetVolumes() []*VolumeInformationMessage { return nil } -func (m *Heartbeat) GetNewVids() []uint32 { +func (m *Heartbeat) GetNewVolumes() []*VolumeShortInformationMessage { if m != nil { - return m.NewVids + return m.NewVolumes } return nil } -func (m *Heartbeat) GetDeletedVids() []uint32 { +func (m *Heartbeat) GetDeletedVolumes() []*VolumeShortInformationMessage { if m != nil { - return m.DeletedVids + return m.DeletedVolumes } return nil } +func (m *Heartbeat) GetHasNoVolumes() bool { + if m != nil { + return m.HasNoVolumes + } + return false +} + +func (m *Heartbeat) GetEcShards() []*VolumeEcShardInformationMessage { + if m != nil { + return m.EcShards + } + return nil +} + +func (m *Heartbeat) GetNewEcShards() []*VolumeEcShardInformationMessage { + if m != nil { + return m.NewEcShards + } + return nil +} + +func (m *Heartbeat) GetDeletedEcShards() []*VolumeEcShardInformationMessage { + if m != nil { + return m.DeletedEcShards + } + return nil +} + +func (m *Heartbeat) GetHasNoEcShards() bool { + if m != nil { + return m.HasNoEcShards + } + return false +} + type HeartbeatResponse struct { - VolumeSizeLimit uint64 `protobuf:"varint,1,opt,name=volumeSizeLimit" json:"volumeSizeLimit,omitempty"` - SecretKey string `protobuf:"bytes,2,opt,name=secretKey" json:"secretKey,omitempty"` - Leader string `protobuf:"bytes,3,opt,name=leader" json:"leader,omitempty"` + VolumeSizeLimit uint64 `protobuf:"varint,1,opt,name=volume_size_limit,json=volumeSizeLimit" json:"volume_size_limit,omitempty"` + Leader string `protobuf:"bytes,2,opt,name=leader" json:"leader,omitempty"` + MetricsAddress string `protobuf:"bytes,3,opt,name=metrics_address,json=metricsAddress" json:"metrics_address,omitempty"` + MetricsIntervalSeconds uint32 `protobuf:"varint,4,opt,name=metrics_interval_seconds,json=metricsIntervalSeconds" json:"metrics_interval_seconds,omitempty"` + StorageBackends []*StorageBackend `protobuf:"bytes,5,rep,name=storage_backends,json=storageBackends" json:"storage_backends,omitempty"` } func (m *HeartbeatResponse) Reset() { *m = HeartbeatResponse{} } @@ -157,31 +226,49 @@ func (m *HeartbeatResponse) GetVolumeSizeLimit() uint64 { return 0 } -func (m *HeartbeatResponse) GetSecretKey() string { +func (m *HeartbeatResponse) GetLeader() string { if m != nil { - return m.SecretKey + return m.Leader } return "" } -func (m *HeartbeatResponse) GetLeader() string { +func (m *HeartbeatResponse) GetMetricsAddress() string { if m != nil { - return m.Leader + return m.MetricsAddress } return "" } +func (m *HeartbeatResponse) GetMetricsIntervalSeconds() uint32 { + if m != nil { + return m.MetricsIntervalSeconds + } + return 0 +} + +func (m *HeartbeatResponse) GetStorageBackends() []*StorageBackend { + if m != nil { + return m.StorageBackends + } + return nil +} + type VolumeInformationMessage struct { - Id uint32 `protobuf:"varint,1,opt,name=id" json:"id,omitempty"` - Size uint64 `protobuf:"varint,2,opt,name=size" json:"size,omitempty"` - Collection string `protobuf:"bytes,3,opt,name=collection" json:"collection,omitempty"` - FileCount uint64 `protobuf:"varint,4,opt,name=file_count,json=fileCount" json:"file_count,omitempty"` - DeleteCount uint64 `protobuf:"varint,5,opt,name=delete_count,json=deleteCount" json:"delete_count,omitempty"` - DeletedByteCount uint64 `protobuf:"varint,6,opt,name=deleted_byte_count,json=deletedByteCount" json:"deleted_byte_count,omitempty"` - ReadOnly bool `protobuf:"varint,7,opt,name=read_only,json=readOnly" json:"read_only,omitempty"` - ReplicaPlacement uint32 `protobuf:"varint,8,opt,name=replica_placement,json=replicaPlacement" json:"replica_placement,omitempty"` - Version uint32 `protobuf:"varint,9,opt,name=version" json:"version,omitempty"` - Ttl uint32 `protobuf:"varint,10,opt,name=ttl" json:"ttl,omitempty"` + Id uint32 `protobuf:"varint,1,opt,name=id" json:"id,omitempty"` + Size uint64 `protobuf:"varint,2,opt,name=size" json:"size,omitempty"` + Collection string `protobuf:"bytes,3,opt,name=collection" json:"collection,omitempty"` + FileCount uint64 `protobuf:"varint,4,opt,name=file_count,json=fileCount" json:"file_count,omitempty"` + DeleteCount uint64 `protobuf:"varint,5,opt,name=delete_count,json=deleteCount" json:"delete_count,omitempty"` + DeletedByteCount uint64 `protobuf:"varint,6,opt,name=deleted_byte_count,json=deletedByteCount" json:"deleted_byte_count,omitempty"` + ReadOnly bool `protobuf:"varint,7,opt,name=read_only,json=readOnly" json:"read_only,omitempty"` + ReplicaPlacement uint32 `protobuf:"varint,8,opt,name=replica_placement,json=replicaPlacement" json:"replica_placement,omitempty"` + Version uint32 `protobuf:"varint,9,opt,name=version" json:"version,omitempty"` + Ttl uint32 `protobuf:"varint,10,opt,name=ttl" json:"ttl,omitempty"` + CompactRevision uint32 `protobuf:"varint,11,opt,name=compact_revision,json=compactRevision" json:"compact_revision,omitempty"` + ModifiedAtSecond int64 `protobuf:"varint,12,opt,name=modified_at_second,json=modifiedAtSecond" json:"modified_at_second,omitempty"` + RemoteStorageName string `protobuf:"bytes,13,opt,name=remote_storage_name,json=remoteStorageName" json:"remote_storage_name,omitempty"` + RemoteStorageKey string `protobuf:"bytes,14,opt,name=remote_storage_key,json=remoteStorageKey" json:"remote_storage_key,omitempty"` } func (m *VolumeInformationMessage) Reset() { *m = VolumeInformationMessage{} } @@ -259,13 +346,153 @@ func (m *VolumeInformationMessage) GetTtl() uint32 { return 0 } +func (m *VolumeInformationMessage) GetCompactRevision() uint32 { + if m != nil { + return m.CompactRevision + } + return 0 +} + +func (m *VolumeInformationMessage) GetModifiedAtSecond() int64 { + if m != nil { + return m.ModifiedAtSecond + } + return 0 +} + +func (m *VolumeInformationMessage) GetRemoteStorageName() string { + if m != nil { + return m.RemoteStorageName + } + return "" +} + +func (m *VolumeInformationMessage) GetRemoteStorageKey() string { + if m != nil { + return m.RemoteStorageKey + } + return "" +} + +type VolumeShortInformationMessage struct { + Id uint32 `protobuf:"varint,1,opt,name=id" json:"id,omitempty"` + Collection string `protobuf:"bytes,3,opt,name=collection" json:"collection,omitempty"` + ReplicaPlacement uint32 `protobuf:"varint,8,opt,name=replica_placement,json=replicaPlacement" json:"replica_placement,omitempty"` + Version uint32 `protobuf:"varint,9,opt,name=version" json:"version,omitempty"` + Ttl uint32 `protobuf:"varint,10,opt,name=ttl" json:"ttl,omitempty"` +} + +func (m *VolumeShortInformationMessage) Reset() { *m = VolumeShortInformationMessage{} } +func (m *VolumeShortInformationMessage) String() string { return proto.CompactTextString(m) } +func (*VolumeShortInformationMessage) ProtoMessage() {} +func (*VolumeShortInformationMessage) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} } + +func (m *VolumeShortInformationMessage) GetId() uint32 { + if m != nil { + return m.Id + } + return 0 +} + +func (m *VolumeShortInformationMessage) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *VolumeShortInformationMessage) GetReplicaPlacement() uint32 { + if m != nil { + return m.ReplicaPlacement + } + return 0 +} + +func (m *VolumeShortInformationMessage) GetVersion() uint32 { + if m != nil { + return m.Version + } + return 0 +} + +func (m *VolumeShortInformationMessage) GetTtl() uint32 { + if m != nil { + return m.Ttl + } + return 0 +} + +type VolumeEcShardInformationMessage struct { + Id uint32 `protobuf:"varint,1,opt,name=id" json:"id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + EcIndexBits uint32 `protobuf:"varint,3,opt,name=ec_index_bits,json=ecIndexBits" json:"ec_index_bits,omitempty"` +} + +func (m *VolumeEcShardInformationMessage) Reset() { *m = VolumeEcShardInformationMessage{} } +func (m *VolumeEcShardInformationMessage) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardInformationMessage) ProtoMessage() {} +func (*VolumeEcShardInformationMessage) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} } + +func (m *VolumeEcShardInformationMessage) GetId() uint32 { + if m != nil { + return m.Id + } + return 0 +} + +func (m *VolumeEcShardInformationMessage) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *VolumeEcShardInformationMessage) GetEcIndexBits() uint32 { + if m != nil { + return m.EcIndexBits + } + return 0 +} + +type StorageBackend struct { + Type string `protobuf:"bytes,1,opt,name=type" json:"type,omitempty"` + Id string `protobuf:"bytes,2,opt,name=id" json:"id,omitempty"` + Properties map[string]string `protobuf:"bytes,3,rep,name=properties" json:"properties,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` +} + +func (m *StorageBackend) Reset() { *m = StorageBackend{} } +func (m *StorageBackend) String() string { return proto.CompactTextString(m) } +func (*StorageBackend) ProtoMessage() {} +func (*StorageBackend) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} } + +func (m *StorageBackend) GetType() string { + if m != nil { + return m.Type + } + return "" +} + +func (m *StorageBackend) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *StorageBackend) GetProperties() map[string]string { + if m != nil { + return m.Properties + } + return nil +} + type Empty struct { } func (m *Empty) Reset() { *m = Empty{} } func (m *Empty) String() string { return proto.CompactTextString(m) } func (*Empty) ProtoMessage() {} -func (*Empty) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} } +func (*Empty) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} } type SuperBlockExtra struct { ErasureCoding *SuperBlockExtra_ErasureCoding `protobuf:"bytes,1,opt,name=erasure_coding,json=erasureCoding" json:"erasure_coding,omitempty"` @@ -274,7 +501,7 @@ type SuperBlockExtra struct { func (m *SuperBlockExtra) Reset() { *m = SuperBlockExtra{} } func (m *SuperBlockExtra) String() string { return proto.CompactTextString(m) } func (*SuperBlockExtra) ProtoMessage() {} -func (*SuperBlockExtra) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} } +func (*SuperBlockExtra) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} } func (m *SuperBlockExtra) GetErasureCoding() *SuperBlockExtra_ErasureCoding { if m != nil { @@ -293,7 +520,7 @@ func (m *SuperBlockExtra_ErasureCoding) Reset() { *m = SuperBlockExtra_E func (m *SuperBlockExtra_ErasureCoding) String() string { return proto.CompactTextString(m) } func (*SuperBlockExtra_ErasureCoding) ProtoMessage() {} func (*SuperBlockExtra_ErasureCoding) Descriptor() ([]byte, []int) { - return fileDescriptor0, []int{4, 0} + return fileDescriptor0, []int{7, 0} } func (m *SuperBlockExtra_ErasureCoding) GetData() uint32 { @@ -317,33 +544,42 @@ func (m *SuperBlockExtra_ErasureCoding) GetVolumeIds() []uint32 { return nil } -type ClientListenRequest struct { - Name string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` +type KeepConnectedRequest struct { + Name string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` + GrpcPort uint32 `protobuf:"varint,2,opt,name=grpc_port,json=grpcPort" json:"grpc_port,omitempty"` } -func (m *ClientListenRequest) Reset() { *m = ClientListenRequest{} } -func (m *ClientListenRequest) String() string { return proto.CompactTextString(m) } -func (*ClientListenRequest) ProtoMessage() {} -func (*ClientListenRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} } +func (m *KeepConnectedRequest) Reset() { *m = KeepConnectedRequest{} } +func (m *KeepConnectedRequest) String() string { return proto.CompactTextString(m) } +func (*KeepConnectedRequest) ProtoMessage() {} +func (*KeepConnectedRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} } -func (m *ClientListenRequest) GetName() string { +func (m *KeepConnectedRequest) GetName() string { if m != nil { return m.Name } return "" } +func (m *KeepConnectedRequest) GetGrpcPort() uint32 { + if m != nil { + return m.GrpcPort + } + return 0 +} + type VolumeLocation struct { Url string `protobuf:"bytes,1,opt,name=url" json:"url,omitempty"` PublicUrl string `protobuf:"bytes,2,opt,name=public_url,json=publicUrl" json:"public_url,omitempty"` NewVids []uint32 `protobuf:"varint,3,rep,packed,name=new_vids,json=newVids" json:"new_vids,omitempty"` DeletedVids []uint32 `protobuf:"varint,4,rep,packed,name=deleted_vids,json=deletedVids" json:"deleted_vids,omitempty"` + Leader string `protobuf:"bytes,5,opt,name=leader" json:"leader,omitempty"` } func (m *VolumeLocation) Reset() { *m = VolumeLocation{} } func (m *VolumeLocation) String() string { return proto.CompactTextString(m) } func (*VolumeLocation) ProtoMessage() {} -func (*VolumeLocation) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} } +func (*VolumeLocation) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} } func (m *VolumeLocation) GetUrl() string { if m != nil { @@ -373,6 +609,13 @@ func (m *VolumeLocation) GetDeletedVids() []uint32 { return nil } +func (m *VolumeLocation) GetLeader() string { + if m != nil { + return m.Leader + } + return "" +} + type LookupVolumeRequest struct { VolumeIds []string `protobuf:"bytes,1,rep,name=volume_ids,json=volumeIds" json:"volume_ids,omitempty"` Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` @@ -381,7 +624,7 @@ type LookupVolumeRequest struct { func (m *LookupVolumeRequest) Reset() { *m = LookupVolumeRequest{} } func (m *LookupVolumeRequest) String() string { return proto.CompactTextString(m) } func (*LookupVolumeRequest) ProtoMessage() {} -func (*LookupVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} } +func (*LookupVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} } func (m *LookupVolumeRequest) GetVolumeIds() []string { if m != nil { @@ -404,7 +647,7 @@ type LookupVolumeResponse struct { func (m *LookupVolumeResponse) Reset() { *m = LookupVolumeResponse{} } func (m *LookupVolumeResponse) String() string { return proto.CompactTextString(m) } func (*LookupVolumeResponse) ProtoMessage() {} -func (*LookupVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} } +func (*LookupVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} } func (m *LookupVolumeResponse) GetVolumeIdLocations() []*LookupVolumeResponse_VolumeIdLocation { if m != nil { @@ -423,7 +666,7 @@ func (m *LookupVolumeResponse_VolumeIdLocation) Reset() { *m = LookupVol func (m *LookupVolumeResponse_VolumeIdLocation) String() string { return proto.CompactTextString(m) } func (*LookupVolumeResponse_VolumeIdLocation) ProtoMessage() {} func (*LookupVolumeResponse_VolumeIdLocation) Descriptor() ([]byte, []int) { - return fileDescriptor0, []int{8, 0} + return fileDescriptor0, []int{11, 0} } func (m *LookupVolumeResponse_VolumeIdLocation) GetVolumeId() string { @@ -455,7 +698,7 @@ type Location struct { func (m *Location) Reset() { *m = Location{} } func (m *Location) String() string { return proto.CompactTextString(m) } func (*Location) ProtoMessage() {} -func (*Location) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} } +func (*Location) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} } func (m *Location) GetUrl() string { if m != nil { @@ -471,19 +714,804 @@ func (m *Location) GetPublicUrl() string { return "" } +type AssignRequest struct { + Count uint64 `protobuf:"varint,1,opt,name=count" json:"count,omitempty"` + Replication string `protobuf:"bytes,2,opt,name=replication" json:"replication,omitempty"` + Collection string `protobuf:"bytes,3,opt,name=collection" json:"collection,omitempty"` + Ttl string `protobuf:"bytes,4,opt,name=ttl" json:"ttl,omitempty"` + DataCenter string `protobuf:"bytes,5,opt,name=data_center,json=dataCenter" json:"data_center,omitempty"` + Rack string `protobuf:"bytes,6,opt,name=rack" json:"rack,omitempty"` + DataNode string `protobuf:"bytes,7,opt,name=data_node,json=dataNode" json:"data_node,omitempty"` + MemoryMapMaxSizeMb uint32 `protobuf:"varint,8,opt,name=memory_map_max_size_mb,json=memoryMapMaxSizeMb" json:"memory_map_max_size_mb,omitempty"` + WritableVolumeCount uint32 `protobuf:"varint,9,opt,name=Writable_volume_count,json=WritableVolumeCount" json:"Writable_volume_count,omitempty"` +} + +func (m *AssignRequest) Reset() { *m = AssignRequest{} } +func (m *AssignRequest) String() string { return proto.CompactTextString(m) } +func (*AssignRequest) ProtoMessage() {} +func (*AssignRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} } + +func (m *AssignRequest) GetCount() uint64 { + if m != nil { + return m.Count + } + return 0 +} + +func (m *AssignRequest) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +func (m *AssignRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *AssignRequest) GetTtl() string { + if m != nil { + return m.Ttl + } + return "" +} + +func (m *AssignRequest) GetDataCenter() string { + if m != nil { + return m.DataCenter + } + return "" +} + +func (m *AssignRequest) GetRack() string { + if m != nil { + return m.Rack + } + return "" +} + +func (m *AssignRequest) GetDataNode() string { + if m != nil { + return m.DataNode + } + return "" +} + +func (m *AssignRequest) GetMemoryMapMaxSizeMb() uint32 { + if m != nil { + return m.MemoryMapMaxSizeMb + } + return 0 +} + +func (m *AssignRequest) GetWritableVolumeCount() uint32 { + if m != nil { + return m.WritableVolumeCount + } + return 0 +} + +type AssignResponse struct { + Fid string `protobuf:"bytes,1,opt,name=fid" json:"fid,omitempty"` + Url string `protobuf:"bytes,2,opt,name=url" json:"url,omitempty"` + PublicUrl string `protobuf:"bytes,3,opt,name=public_url,json=publicUrl" json:"public_url,omitempty"` + Count uint64 `protobuf:"varint,4,opt,name=count" json:"count,omitempty"` + Error string `protobuf:"bytes,5,opt,name=error" json:"error,omitempty"` + Auth string `protobuf:"bytes,6,opt,name=auth" json:"auth,omitempty"` +} + +func (m *AssignResponse) Reset() { *m = AssignResponse{} } +func (m *AssignResponse) String() string { return proto.CompactTextString(m) } +func (*AssignResponse) ProtoMessage() {} +func (*AssignResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} } + +func (m *AssignResponse) GetFid() string { + if m != nil { + return m.Fid + } + return "" +} + +func (m *AssignResponse) GetUrl() string { + if m != nil { + return m.Url + } + return "" +} + +func (m *AssignResponse) GetPublicUrl() string { + if m != nil { + return m.PublicUrl + } + return "" +} + +func (m *AssignResponse) GetCount() uint64 { + if m != nil { + return m.Count + } + return 0 +} + +func (m *AssignResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} + +func (m *AssignResponse) GetAuth() string { + if m != nil { + return m.Auth + } + return "" +} + +type StatisticsRequest struct { + Replication string `protobuf:"bytes,1,opt,name=replication" json:"replication,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + Ttl string `protobuf:"bytes,3,opt,name=ttl" json:"ttl,omitempty"` +} + +func (m *StatisticsRequest) Reset() { *m = StatisticsRequest{} } +func (m *StatisticsRequest) String() string { return proto.CompactTextString(m) } +func (*StatisticsRequest) ProtoMessage() {} +func (*StatisticsRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{15} } + +func (m *StatisticsRequest) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +func (m *StatisticsRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *StatisticsRequest) GetTtl() string { + if m != nil { + return m.Ttl + } + return "" +} + +type StatisticsResponse struct { + Replication string `protobuf:"bytes,1,opt,name=replication" json:"replication,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + Ttl string `protobuf:"bytes,3,opt,name=ttl" json:"ttl,omitempty"` + TotalSize uint64 `protobuf:"varint,4,opt,name=total_size,json=totalSize" json:"total_size,omitempty"` + UsedSize uint64 `protobuf:"varint,5,opt,name=used_size,json=usedSize" json:"used_size,omitempty"` + FileCount uint64 `protobuf:"varint,6,opt,name=file_count,json=fileCount" json:"file_count,omitempty"` +} + +func (m *StatisticsResponse) Reset() { *m = StatisticsResponse{} } +func (m *StatisticsResponse) String() string { return proto.CompactTextString(m) } +func (*StatisticsResponse) ProtoMessage() {} +func (*StatisticsResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{16} } + +func (m *StatisticsResponse) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +func (m *StatisticsResponse) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *StatisticsResponse) GetTtl() string { + if m != nil { + return m.Ttl + } + return "" +} + +func (m *StatisticsResponse) GetTotalSize() uint64 { + if m != nil { + return m.TotalSize + } + return 0 +} + +func (m *StatisticsResponse) GetUsedSize() uint64 { + if m != nil { + return m.UsedSize + } + return 0 +} + +func (m *StatisticsResponse) GetFileCount() uint64 { + if m != nil { + return m.FileCount + } + return 0 +} + +type StorageType struct { + Replication string `protobuf:"bytes,1,opt,name=replication" json:"replication,omitempty"` + Ttl string `protobuf:"bytes,2,opt,name=ttl" json:"ttl,omitempty"` +} + +func (m *StorageType) Reset() { *m = StorageType{} } +func (m *StorageType) String() string { return proto.CompactTextString(m) } +func (*StorageType) ProtoMessage() {} +func (*StorageType) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{17} } + +func (m *StorageType) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +func (m *StorageType) GetTtl() string { + if m != nil { + return m.Ttl + } + return "" +} + +type Collection struct { + Name string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` +} + +func (m *Collection) Reset() { *m = Collection{} } +func (m *Collection) String() string { return proto.CompactTextString(m) } +func (*Collection) ProtoMessage() {} +func (*Collection) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{18} } + +func (m *Collection) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +type CollectionListRequest struct { + IncludeNormalVolumes bool `protobuf:"varint,1,opt,name=include_normal_volumes,json=includeNormalVolumes" json:"include_normal_volumes,omitempty"` + IncludeEcVolumes bool `protobuf:"varint,2,opt,name=include_ec_volumes,json=includeEcVolumes" json:"include_ec_volumes,omitempty"` +} + +func (m *CollectionListRequest) Reset() { *m = CollectionListRequest{} } +func (m *CollectionListRequest) String() string { return proto.CompactTextString(m) } +func (*CollectionListRequest) ProtoMessage() {} +func (*CollectionListRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{19} } + +func (m *CollectionListRequest) GetIncludeNormalVolumes() bool { + if m != nil { + return m.IncludeNormalVolumes + } + return false +} + +func (m *CollectionListRequest) GetIncludeEcVolumes() bool { + if m != nil { + return m.IncludeEcVolumes + } + return false +} + +type CollectionListResponse struct { + Collections []*Collection `protobuf:"bytes,1,rep,name=collections" json:"collections,omitempty"` +} + +func (m *CollectionListResponse) Reset() { *m = CollectionListResponse{} } +func (m *CollectionListResponse) String() string { return proto.CompactTextString(m) } +func (*CollectionListResponse) ProtoMessage() {} +func (*CollectionListResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{20} } + +func (m *CollectionListResponse) GetCollections() []*Collection { + if m != nil { + return m.Collections + } + return nil +} + +type CollectionDeleteRequest struct { + Name string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` +} + +func (m *CollectionDeleteRequest) Reset() { *m = CollectionDeleteRequest{} } +func (m *CollectionDeleteRequest) String() string { return proto.CompactTextString(m) } +func (*CollectionDeleteRequest) ProtoMessage() {} +func (*CollectionDeleteRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{21} } + +func (m *CollectionDeleteRequest) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +type CollectionDeleteResponse struct { +} + +func (m *CollectionDeleteResponse) Reset() { *m = CollectionDeleteResponse{} } +func (m *CollectionDeleteResponse) String() string { return proto.CompactTextString(m) } +func (*CollectionDeleteResponse) ProtoMessage() {} +func (*CollectionDeleteResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{22} } + +// +// volume related +// +type DataNodeInfo struct { + Id string `protobuf:"bytes,1,opt,name=id" json:"id,omitempty"` + VolumeCount uint64 `protobuf:"varint,2,opt,name=volume_count,json=volumeCount" json:"volume_count,omitempty"` + MaxVolumeCount uint64 `protobuf:"varint,3,opt,name=max_volume_count,json=maxVolumeCount" json:"max_volume_count,omitempty"` + FreeVolumeCount uint64 `protobuf:"varint,4,opt,name=free_volume_count,json=freeVolumeCount" json:"free_volume_count,omitempty"` + ActiveVolumeCount uint64 `protobuf:"varint,5,opt,name=active_volume_count,json=activeVolumeCount" json:"active_volume_count,omitempty"` + VolumeInfos []*VolumeInformationMessage `protobuf:"bytes,6,rep,name=volume_infos,json=volumeInfos" json:"volume_infos,omitempty"` + EcShardInfos []*VolumeEcShardInformationMessage `protobuf:"bytes,7,rep,name=ec_shard_infos,json=ecShardInfos" json:"ec_shard_infos,omitempty"` + RemoteVolumeCount uint64 `protobuf:"varint,8,opt,name=remote_volume_count,json=remoteVolumeCount" json:"remote_volume_count,omitempty"` +} + +func (m *DataNodeInfo) Reset() { *m = DataNodeInfo{} } +func (m *DataNodeInfo) String() string { return proto.CompactTextString(m) } +func (*DataNodeInfo) ProtoMessage() {} +func (*DataNodeInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{23} } + +func (m *DataNodeInfo) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *DataNodeInfo) GetVolumeCount() uint64 { + if m != nil { + return m.VolumeCount + } + return 0 +} + +func (m *DataNodeInfo) GetMaxVolumeCount() uint64 { + if m != nil { + return m.MaxVolumeCount + } + return 0 +} + +func (m *DataNodeInfo) GetFreeVolumeCount() uint64 { + if m != nil { + return m.FreeVolumeCount + } + return 0 +} + +func (m *DataNodeInfo) GetActiveVolumeCount() uint64 { + if m != nil { + return m.ActiveVolumeCount + } + return 0 +} + +func (m *DataNodeInfo) GetVolumeInfos() []*VolumeInformationMessage { + if m != nil { + return m.VolumeInfos + } + return nil +} + +func (m *DataNodeInfo) GetEcShardInfos() []*VolumeEcShardInformationMessage { + if m != nil { + return m.EcShardInfos + } + return nil +} + +func (m *DataNodeInfo) GetRemoteVolumeCount() uint64 { + if m != nil { + return m.RemoteVolumeCount + } + return 0 +} + +type RackInfo struct { + Id string `protobuf:"bytes,1,opt,name=id" json:"id,omitempty"` + VolumeCount uint64 `protobuf:"varint,2,opt,name=volume_count,json=volumeCount" json:"volume_count,omitempty"` + MaxVolumeCount uint64 `protobuf:"varint,3,opt,name=max_volume_count,json=maxVolumeCount" json:"max_volume_count,omitempty"` + FreeVolumeCount uint64 `protobuf:"varint,4,opt,name=free_volume_count,json=freeVolumeCount" json:"free_volume_count,omitempty"` + ActiveVolumeCount uint64 `protobuf:"varint,5,opt,name=active_volume_count,json=activeVolumeCount" json:"active_volume_count,omitempty"` + DataNodeInfos []*DataNodeInfo `protobuf:"bytes,6,rep,name=data_node_infos,json=dataNodeInfos" json:"data_node_infos,omitempty"` + RemoteVolumeCount uint64 `protobuf:"varint,7,opt,name=remote_volume_count,json=remoteVolumeCount" json:"remote_volume_count,omitempty"` +} + +func (m *RackInfo) Reset() { *m = RackInfo{} } +func (m *RackInfo) String() string { return proto.CompactTextString(m) } +func (*RackInfo) ProtoMessage() {} +func (*RackInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{24} } + +func (m *RackInfo) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *RackInfo) GetVolumeCount() uint64 { + if m != nil { + return m.VolumeCount + } + return 0 +} + +func (m *RackInfo) GetMaxVolumeCount() uint64 { + if m != nil { + return m.MaxVolumeCount + } + return 0 +} + +func (m *RackInfo) GetFreeVolumeCount() uint64 { + if m != nil { + return m.FreeVolumeCount + } + return 0 +} + +func (m *RackInfo) GetActiveVolumeCount() uint64 { + if m != nil { + return m.ActiveVolumeCount + } + return 0 +} + +func (m *RackInfo) GetDataNodeInfos() []*DataNodeInfo { + if m != nil { + return m.DataNodeInfos + } + return nil +} + +func (m *RackInfo) GetRemoteVolumeCount() uint64 { + if m != nil { + return m.RemoteVolumeCount + } + return 0 +} + +type DataCenterInfo struct { + Id string `protobuf:"bytes,1,opt,name=id" json:"id,omitempty"` + VolumeCount uint64 `protobuf:"varint,2,opt,name=volume_count,json=volumeCount" json:"volume_count,omitempty"` + MaxVolumeCount uint64 `protobuf:"varint,3,opt,name=max_volume_count,json=maxVolumeCount" json:"max_volume_count,omitempty"` + FreeVolumeCount uint64 `protobuf:"varint,4,opt,name=free_volume_count,json=freeVolumeCount" json:"free_volume_count,omitempty"` + ActiveVolumeCount uint64 `protobuf:"varint,5,opt,name=active_volume_count,json=activeVolumeCount" json:"active_volume_count,omitempty"` + RackInfos []*RackInfo `protobuf:"bytes,6,rep,name=rack_infos,json=rackInfos" json:"rack_infos,omitempty"` + RemoteVolumeCount uint64 `protobuf:"varint,7,opt,name=remote_volume_count,json=remoteVolumeCount" json:"remote_volume_count,omitempty"` +} + +func (m *DataCenterInfo) Reset() { *m = DataCenterInfo{} } +func (m *DataCenterInfo) String() string { return proto.CompactTextString(m) } +func (*DataCenterInfo) ProtoMessage() {} +func (*DataCenterInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{25} } + +func (m *DataCenterInfo) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *DataCenterInfo) GetVolumeCount() uint64 { + if m != nil { + return m.VolumeCount + } + return 0 +} + +func (m *DataCenterInfo) GetMaxVolumeCount() uint64 { + if m != nil { + return m.MaxVolumeCount + } + return 0 +} + +func (m *DataCenterInfo) GetFreeVolumeCount() uint64 { + if m != nil { + return m.FreeVolumeCount + } + return 0 +} + +func (m *DataCenterInfo) GetActiveVolumeCount() uint64 { + if m != nil { + return m.ActiveVolumeCount + } + return 0 +} + +func (m *DataCenterInfo) GetRackInfos() []*RackInfo { + if m != nil { + return m.RackInfos + } + return nil +} + +func (m *DataCenterInfo) GetRemoteVolumeCount() uint64 { + if m != nil { + return m.RemoteVolumeCount + } + return 0 +} + +type TopologyInfo struct { + Id string `protobuf:"bytes,1,opt,name=id" json:"id,omitempty"` + VolumeCount uint64 `protobuf:"varint,2,opt,name=volume_count,json=volumeCount" json:"volume_count,omitempty"` + MaxVolumeCount uint64 `protobuf:"varint,3,opt,name=max_volume_count,json=maxVolumeCount" json:"max_volume_count,omitempty"` + FreeVolumeCount uint64 `protobuf:"varint,4,opt,name=free_volume_count,json=freeVolumeCount" json:"free_volume_count,omitempty"` + ActiveVolumeCount uint64 `protobuf:"varint,5,opt,name=active_volume_count,json=activeVolumeCount" json:"active_volume_count,omitempty"` + DataCenterInfos []*DataCenterInfo `protobuf:"bytes,6,rep,name=data_center_infos,json=dataCenterInfos" json:"data_center_infos,omitempty"` + RemoteVolumeCount uint64 `protobuf:"varint,7,opt,name=remote_volume_count,json=remoteVolumeCount" json:"remote_volume_count,omitempty"` +} + +func (m *TopologyInfo) Reset() { *m = TopologyInfo{} } +func (m *TopologyInfo) String() string { return proto.CompactTextString(m) } +func (*TopologyInfo) ProtoMessage() {} +func (*TopologyInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{26} } + +func (m *TopologyInfo) GetId() string { + if m != nil { + return m.Id + } + return "" +} + +func (m *TopologyInfo) GetVolumeCount() uint64 { + if m != nil { + return m.VolumeCount + } + return 0 +} + +func (m *TopologyInfo) GetMaxVolumeCount() uint64 { + if m != nil { + return m.MaxVolumeCount + } + return 0 +} + +func (m *TopologyInfo) GetFreeVolumeCount() uint64 { + if m != nil { + return m.FreeVolumeCount + } + return 0 +} + +func (m *TopologyInfo) GetActiveVolumeCount() uint64 { + if m != nil { + return m.ActiveVolumeCount + } + return 0 +} + +func (m *TopologyInfo) GetDataCenterInfos() []*DataCenterInfo { + if m != nil { + return m.DataCenterInfos + } + return nil +} + +func (m *TopologyInfo) GetRemoteVolumeCount() uint64 { + if m != nil { + return m.RemoteVolumeCount + } + return 0 +} + +type VolumeListRequest struct { +} + +func (m *VolumeListRequest) Reset() { *m = VolumeListRequest{} } +func (m *VolumeListRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeListRequest) ProtoMessage() {} +func (*VolumeListRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{27} } + +type VolumeListResponse struct { + TopologyInfo *TopologyInfo `protobuf:"bytes,1,opt,name=topology_info,json=topologyInfo" json:"topology_info,omitempty"` + VolumeSizeLimitMb uint64 `protobuf:"varint,2,opt,name=volume_size_limit_mb,json=volumeSizeLimitMb" json:"volume_size_limit_mb,omitempty"` +} + +func (m *VolumeListResponse) Reset() { *m = VolumeListResponse{} } +func (m *VolumeListResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeListResponse) ProtoMessage() {} +func (*VolumeListResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{28} } + +func (m *VolumeListResponse) GetTopologyInfo() *TopologyInfo { + if m != nil { + return m.TopologyInfo + } + return nil +} + +func (m *VolumeListResponse) GetVolumeSizeLimitMb() uint64 { + if m != nil { + return m.VolumeSizeLimitMb + } + return 0 +} + +type LookupEcVolumeRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` +} + +func (m *LookupEcVolumeRequest) Reset() { *m = LookupEcVolumeRequest{} } +func (m *LookupEcVolumeRequest) String() string { return proto.CompactTextString(m) } +func (*LookupEcVolumeRequest) ProtoMessage() {} +func (*LookupEcVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{29} } + +func (m *LookupEcVolumeRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +type LookupEcVolumeResponse struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + ShardIdLocations []*LookupEcVolumeResponse_EcShardIdLocation `protobuf:"bytes,2,rep,name=shard_id_locations,json=shardIdLocations" json:"shard_id_locations,omitempty"` +} + +func (m *LookupEcVolumeResponse) Reset() { *m = LookupEcVolumeResponse{} } +func (m *LookupEcVolumeResponse) String() string { return proto.CompactTextString(m) } +func (*LookupEcVolumeResponse) ProtoMessage() {} +func (*LookupEcVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{30} } + +func (m *LookupEcVolumeResponse) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *LookupEcVolumeResponse) GetShardIdLocations() []*LookupEcVolumeResponse_EcShardIdLocation { + if m != nil { + return m.ShardIdLocations + } + return nil +} + +type LookupEcVolumeResponse_EcShardIdLocation struct { + ShardId uint32 `protobuf:"varint,1,opt,name=shard_id,json=shardId" json:"shard_id,omitempty"` + Locations []*Location `protobuf:"bytes,2,rep,name=locations" json:"locations,omitempty"` +} + +func (m *LookupEcVolumeResponse_EcShardIdLocation) Reset() { + *m = LookupEcVolumeResponse_EcShardIdLocation{} +} +func (m *LookupEcVolumeResponse_EcShardIdLocation) String() string { return proto.CompactTextString(m) } +func (*LookupEcVolumeResponse_EcShardIdLocation) ProtoMessage() {} +func (*LookupEcVolumeResponse_EcShardIdLocation) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{30, 0} +} + +func (m *LookupEcVolumeResponse_EcShardIdLocation) GetShardId() uint32 { + if m != nil { + return m.ShardId + } + return 0 +} + +func (m *LookupEcVolumeResponse_EcShardIdLocation) GetLocations() []*Location { + if m != nil { + return m.Locations + } + return nil +} + +type GetMasterConfigurationRequest struct { +} + +func (m *GetMasterConfigurationRequest) Reset() { *m = GetMasterConfigurationRequest{} } +func (m *GetMasterConfigurationRequest) String() string { return proto.CompactTextString(m) } +func (*GetMasterConfigurationRequest) ProtoMessage() {} +func (*GetMasterConfigurationRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{31} } + +type GetMasterConfigurationResponse struct { + MetricsAddress string `protobuf:"bytes,1,opt,name=metrics_address,json=metricsAddress" json:"metrics_address,omitempty"` + MetricsIntervalSeconds uint32 `protobuf:"varint,2,opt,name=metrics_interval_seconds,json=metricsIntervalSeconds" json:"metrics_interval_seconds,omitempty"` +} + +func (m *GetMasterConfigurationResponse) Reset() { *m = GetMasterConfigurationResponse{} } +func (m *GetMasterConfigurationResponse) String() string { return proto.CompactTextString(m) } +func (*GetMasterConfigurationResponse) ProtoMessage() {} +func (*GetMasterConfigurationResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{32} } + +func (m *GetMasterConfigurationResponse) GetMetricsAddress() string { + if m != nil { + return m.MetricsAddress + } + return "" +} + +func (m *GetMasterConfigurationResponse) GetMetricsIntervalSeconds() uint32 { + if m != nil { + return m.MetricsIntervalSeconds + } + return 0 +} + +type ListMasterClientsRequest struct { + ClientType string `protobuf:"bytes,1,opt,name=client_type,json=clientType" json:"client_type,omitempty"` +} + +func (m *ListMasterClientsRequest) Reset() { *m = ListMasterClientsRequest{} } +func (m *ListMasterClientsRequest) String() string { return proto.CompactTextString(m) } +func (*ListMasterClientsRequest) ProtoMessage() {} +func (*ListMasterClientsRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{33} } + +func (m *ListMasterClientsRequest) GetClientType() string { + if m != nil { + return m.ClientType + } + return "" +} + +type ListMasterClientsResponse struct { + GrpcAddresses []string `protobuf:"bytes,1,rep,name=grpc_addresses,json=grpcAddresses" json:"grpc_addresses,omitempty"` +} + +func (m *ListMasterClientsResponse) Reset() { *m = ListMasterClientsResponse{} } +func (m *ListMasterClientsResponse) String() string { return proto.CompactTextString(m) } +func (*ListMasterClientsResponse) ProtoMessage() {} +func (*ListMasterClientsResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{34} } + +func (m *ListMasterClientsResponse) GetGrpcAddresses() []string { + if m != nil { + return m.GrpcAddresses + } + return nil +} + func init() { proto.RegisterType((*Heartbeat)(nil), "master_pb.Heartbeat") proto.RegisterType((*HeartbeatResponse)(nil), "master_pb.HeartbeatResponse") proto.RegisterType((*VolumeInformationMessage)(nil), "master_pb.VolumeInformationMessage") + proto.RegisterType((*VolumeShortInformationMessage)(nil), "master_pb.VolumeShortInformationMessage") + proto.RegisterType((*VolumeEcShardInformationMessage)(nil), "master_pb.VolumeEcShardInformationMessage") + proto.RegisterType((*StorageBackend)(nil), "master_pb.StorageBackend") proto.RegisterType((*Empty)(nil), "master_pb.Empty") proto.RegisterType((*SuperBlockExtra)(nil), "master_pb.SuperBlockExtra") proto.RegisterType((*SuperBlockExtra_ErasureCoding)(nil), "master_pb.SuperBlockExtra.ErasureCoding") - proto.RegisterType((*ClientListenRequest)(nil), "master_pb.ClientListenRequest") + proto.RegisterType((*KeepConnectedRequest)(nil), "master_pb.KeepConnectedRequest") proto.RegisterType((*VolumeLocation)(nil), "master_pb.VolumeLocation") proto.RegisterType((*LookupVolumeRequest)(nil), "master_pb.LookupVolumeRequest") proto.RegisterType((*LookupVolumeResponse)(nil), "master_pb.LookupVolumeResponse") proto.RegisterType((*LookupVolumeResponse_VolumeIdLocation)(nil), "master_pb.LookupVolumeResponse.VolumeIdLocation") proto.RegisterType((*Location)(nil), "master_pb.Location") + proto.RegisterType((*AssignRequest)(nil), "master_pb.AssignRequest") + proto.RegisterType((*AssignResponse)(nil), "master_pb.AssignResponse") + proto.RegisterType((*StatisticsRequest)(nil), "master_pb.StatisticsRequest") + proto.RegisterType((*StatisticsResponse)(nil), "master_pb.StatisticsResponse") + proto.RegisterType((*StorageType)(nil), "master_pb.StorageType") + proto.RegisterType((*Collection)(nil), "master_pb.Collection") + proto.RegisterType((*CollectionListRequest)(nil), "master_pb.CollectionListRequest") + proto.RegisterType((*CollectionListResponse)(nil), "master_pb.CollectionListResponse") + proto.RegisterType((*CollectionDeleteRequest)(nil), "master_pb.CollectionDeleteRequest") + proto.RegisterType((*CollectionDeleteResponse)(nil), "master_pb.CollectionDeleteResponse") + proto.RegisterType((*DataNodeInfo)(nil), "master_pb.DataNodeInfo") + proto.RegisterType((*RackInfo)(nil), "master_pb.RackInfo") + proto.RegisterType((*DataCenterInfo)(nil), "master_pb.DataCenterInfo") + proto.RegisterType((*TopologyInfo)(nil), "master_pb.TopologyInfo") + proto.RegisterType((*VolumeListRequest)(nil), "master_pb.VolumeListRequest") + proto.RegisterType((*VolumeListResponse)(nil), "master_pb.VolumeListResponse") + proto.RegisterType((*LookupEcVolumeRequest)(nil), "master_pb.LookupEcVolumeRequest") + proto.RegisterType((*LookupEcVolumeResponse)(nil), "master_pb.LookupEcVolumeResponse") + proto.RegisterType((*LookupEcVolumeResponse_EcShardIdLocation)(nil), "master_pb.LookupEcVolumeResponse.EcShardIdLocation") + proto.RegisterType((*GetMasterConfigurationRequest)(nil), "master_pb.GetMasterConfigurationRequest") + proto.RegisterType((*GetMasterConfigurationResponse)(nil), "master_pb.GetMasterConfigurationResponse") + proto.RegisterType((*ListMasterClientsRequest)(nil), "master_pb.ListMasterClientsRequest") + proto.RegisterType((*ListMasterClientsResponse)(nil), "master_pb.ListMasterClientsResponse") } // Reference imports to suppress errors if they are not otherwise used. @@ -500,6 +1528,14 @@ type SeaweedClient interface { SendHeartbeat(ctx context.Context, opts ...grpc.CallOption) (Seaweed_SendHeartbeatClient, error) KeepConnected(ctx context.Context, opts ...grpc.CallOption) (Seaweed_KeepConnectedClient, error) LookupVolume(ctx context.Context, in *LookupVolumeRequest, opts ...grpc.CallOption) (*LookupVolumeResponse, error) + Assign(ctx context.Context, in *AssignRequest, opts ...grpc.CallOption) (*AssignResponse, error) + Statistics(ctx context.Context, in *StatisticsRequest, opts ...grpc.CallOption) (*StatisticsResponse, error) + CollectionList(ctx context.Context, in *CollectionListRequest, opts ...grpc.CallOption) (*CollectionListResponse, error) + CollectionDelete(ctx context.Context, in *CollectionDeleteRequest, opts ...grpc.CallOption) (*CollectionDeleteResponse, error) + VolumeList(ctx context.Context, in *VolumeListRequest, opts ...grpc.CallOption) (*VolumeListResponse, error) + LookupEcVolume(ctx context.Context, in *LookupEcVolumeRequest, opts ...grpc.CallOption) (*LookupEcVolumeResponse, error) + GetMasterConfiguration(ctx context.Context, in *GetMasterConfigurationRequest, opts ...grpc.CallOption) (*GetMasterConfigurationResponse, error) + ListMasterClients(ctx context.Context, in *ListMasterClientsRequest, opts ...grpc.CallOption) (*ListMasterClientsResponse, error) } type seaweedClient struct { @@ -551,7 +1587,7 @@ func (c *seaweedClient) KeepConnected(ctx context.Context, opts ...grpc.CallOpti } type Seaweed_KeepConnectedClient interface { - Send(*ClientListenRequest) error + Send(*KeepConnectedRequest) error Recv() (*VolumeLocation, error) grpc.ClientStream } @@ -560,7 +1596,7 @@ type seaweedKeepConnectedClient struct { grpc.ClientStream } -func (x *seaweedKeepConnectedClient) Send(m *ClientListenRequest) error { +func (x *seaweedKeepConnectedClient) Send(m *KeepConnectedRequest) error { return x.ClientStream.SendMsg(m) } @@ -581,12 +1617,92 @@ func (c *seaweedClient) LookupVolume(ctx context.Context, in *LookupVolumeReques return out, nil } +func (c *seaweedClient) Assign(ctx context.Context, in *AssignRequest, opts ...grpc.CallOption) (*AssignResponse, error) { + out := new(AssignResponse) + err := grpc.Invoke(ctx, "/master_pb.Seaweed/Assign", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *seaweedClient) Statistics(ctx context.Context, in *StatisticsRequest, opts ...grpc.CallOption) (*StatisticsResponse, error) { + out := new(StatisticsResponse) + err := grpc.Invoke(ctx, "/master_pb.Seaweed/Statistics", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *seaweedClient) CollectionList(ctx context.Context, in *CollectionListRequest, opts ...grpc.CallOption) (*CollectionListResponse, error) { + out := new(CollectionListResponse) + err := grpc.Invoke(ctx, "/master_pb.Seaweed/CollectionList", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *seaweedClient) CollectionDelete(ctx context.Context, in *CollectionDeleteRequest, opts ...grpc.CallOption) (*CollectionDeleteResponse, error) { + out := new(CollectionDeleteResponse) + err := grpc.Invoke(ctx, "/master_pb.Seaweed/CollectionDelete", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *seaweedClient) VolumeList(ctx context.Context, in *VolumeListRequest, opts ...grpc.CallOption) (*VolumeListResponse, error) { + out := new(VolumeListResponse) + err := grpc.Invoke(ctx, "/master_pb.Seaweed/VolumeList", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *seaweedClient) LookupEcVolume(ctx context.Context, in *LookupEcVolumeRequest, opts ...grpc.CallOption) (*LookupEcVolumeResponse, error) { + out := new(LookupEcVolumeResponse) + err := grpc.Invoke(ctx, "/master_pb.Seaweed/LookupEcVolume", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *seaweedClient) GetMasterConfiguration(ctx context.Context, in *GetMasterConfigurationRequest, opts ...grpc.CallOption) (*GetMasterConfigurationResponse, error) { + out := new(GetMasterConfigurationResponse) + err := grpc.Invoke(ctx, "/master_pb.Seaweed/GetMasterConfiguration", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *seaweedClient) ListMasterClients(ctx context.Context, in *ListMasterClientsRequest, opts ...grpc.CallOption) (*ListMasterClientsResponse, error) { + out := new(ListMasterClientsResponse) + err := grpc.Invoke(ctx, "/master_pb.Seaweed/ListMasterClients", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + // Server API for Seaweed service type SeaweedServer interface { SendHeartbeat(Seaweed_SendHeartbeatServer) error KeepConnected(Seaweed_KeepConnectedServer) error LookupVolume(context.Context, *LookupVolumeRequest) (*LookupVolumeResponse, error) + Assign(context.Context, *AssignRequest) (*AssignResponse, error) + Statistics(context.Context, *StatisticsRequest) (*StatisticsResponse, error) + CollectionList(context.Context, *CollectionListRequest) (*CollectionListResponse, error) + CollectionDelete(context.Context, *CollectionDeleteRequest) (*CollectionDeleteResponse, error) + VolumeList(context.Context, *VolumeListRequest) (*VolumeListResponse, error) + LookupEcVolume(context.Context, *LookupEcVolumeRequest) (*LookupEcVolumeResponse, error) + GetMasterConfiguration(context.Context, *GetMasterConfigurationRequest) (*GetMasterConfigurationResponse, error) + ListMasterClients(context.Context, *ListMasterClientsRequest) (*ListMasterClientsResponse, error) } func RegisterSeaweedServer(s *grpc.Server, srv SeaweedServer) { @@ -625,7 +1741,7 @@ func _Seaweed_KeepConnected_Handler(srv interface{}, stream grpc.ServerStream) e type Seaweed_KeepConnectedServer interface { Send(*VolumeLocation) error - Recv() (*ClientListenRequest, error) + Recv() (*KeepConnectedRequest, error) grpc.ServerStream } @@ -637,8 +1753,8 @@ func (x *seaweedKeepConnectedServer) Send(m *VolumeLocation) error { return x.ServerStream.SendMsg(m) } -func (x *seaweedKeepConnectedServer) Recv() (*ClientListenRequest, error) { - m := new(ClientListenRequest) +func (x *seaweedKeepConnectedServer) Recv() (*KeepConnectedRequest, error) { + m := new(KeepConnectedRequest) if err := x.ServerStream.RecvMsg(m); err != nil { return nil, err } @@ -663,6 +1779,150 @@ func _Seaweed_LookupVolume_Handler(srv interface{}, ctx context.Context, dec fun return interceptor(ctx, in, info, handler) } +func _Seaweed_Assign_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(AssignRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedServer).Assign(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/master_pb.Seaweed/Assign", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedServer).Assign(ctx, req.(*AssignRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Seaweed_Statistics_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StatisticsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedServer).Statistics(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/master_pb.Seaweed/Statistics", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedServer).Statistics(ctx, req.(*StatisticsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Seaweed_CollectionList_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CollectionListRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedServer).CollectionList(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/master_pb.Seaweed/CollectionList", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedServer).CollectionList(ctx, req.(*CollectionListRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Seaweed_CollectionDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CollectionDeleteRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedServer).CollectionDelete(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/master_pb.Seaweed/CollectionDelete", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedServer).CollectionDelete(ctx, req.(*CollectionDeleteRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Seaweed_VolumeList_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeListRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedServer).VolumeList(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/master_pb.Seaweed/VolumeList", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedServer).VolumeList(ctx, req.(*VolumeListRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Seaweed_LookupEcVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(LookupEcVolumeRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedServer).LookupEcVolume(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/master_pb.Seaweed/LookupEcVolume", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedServer).LookupEcVolume(ctx, req.(*LookupEcVolumeRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Seaweed_GetMasterConfiguration_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetMasterConfigurationRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedServer).GetMasterConfiguration(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/master_pb.Seaweed/GetMasterConfiguration", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedServer).GetMasterConfiguration(ctx, req.(*GetMasterConfigurationRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _Seaweed_ListMasterClients_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ListMasterClientsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedServer).ListMasterClients(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/master_pb.Seaweed/ListMasterClients", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedServer).ListMasterClients(ctx, req.(*ListMasterClientsRequest)) + } + return interceptor(ctx, in, info, handler) +} + var _Seaweed_serviceDesc = grpc.ServiceDesc{ ServiceName: "master_pb.Seaweed", HandlerType: (*SeaweedServer)(nil), @@ -671,6 +1931,38 @@ var _Seaweed_serviceDesc = grpc.ServiceDesc{ MethodName: "LookupVolume", Handler: _Seaweed_LookupVolume_Handler, }, + { + MethodName: "Assign", + Handler: _Seaweed_Assign_Handler, + }, + { + MethodName: "Statistics", + Handler: _Seaweed_Statistics_Handler, + }, + { + MethodName: "CollectionList", + Handler: _Seaweed_CollectionList_Handler, + }, + { + MethodName: "CollectionDelete", + Handler: _Seaweed_CollectionDelete_Handler, + }, + { + MethodName: "VolumeList", + Handler: _Seaweed_VolumeList_Handler, + }, + { + MethodName: "LookupEcVolume", + Handler: _Seaweed_LookupEcVolume_Handler, + }, + { + MethodName: "GetMasterConfiguration", + Handler: _Seaweed_GetMasterConfiguration_Handler, + }, + { + MethodName: "ListMasterClients", + Handler: _Seaweed_ListMasterClients_Handler, + }, }, Streams: []grpc.StreamDesc{ { @@ -692,59 +1984,142 @@ var _Seaweed_serviceDesc = grpc.ServiceDesc{ func init() { proto.RegisterFile("master.proto", fileDescriptor0) } var fileDescriptor0 = []byte{ - // 855 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x9c, 0x55, 0xcf, 0x6f, 0xdc, 0x44, - 0x14, 0xae, 0xbd, 0x9b, 0xec, 0xfa, 0x6d, 0x36, 0xdd, 0x4c, 0x22, 0xe4, 0x2e, 0xa5, 0x5d, 0xcc, - 0xc5, 0x08, 0x14, 0x95, 0x70, 0x44, 0x5c, 0xba, 0x0a, 0x22, 0x4a, 0x50, 0x83, 0x03, 0x3d, 0x70, - 0x31, 0xb3, 0xf6, 0x6b, 0x35, 0xca, 0x78, 0x6c, 0x66, 0x66, 0x93, 0x75, 0x2f, 0xfc, 0x67, 0x9c, - 0xf8, 0x6f, 0xb8, 0x71, 0xe3, 0x86, 0x66, 0x3c, 0xde, 0x1f, 0x6e, 0x00, 0x89, 0xdb, 0x9b, 0x6f, - 0xde, 0x8f, 0x6f, 0xde, 0xf7, 0x9e, 0x0d, 0x07, 0x05, 0x55, 0x1a, 0xe5, 0x69, 0x25, 0x4b, 0x5d, - 0x92, 0xa0, 0x39, 0xa5, 0xd5, 0x22, 0xfa, 0xc3, 0x87, 0xe0, 0x5b, 0xa4, 0x52, 0x2f, 0x90, 0x6a, - 0x72, 0x08, 0x3e, 0xab, 0x42, 0x6f, 0xe6, 0xc5, 0x41, 0xe2, 0xb3, 0x8a, 0x10, 0xe8, 0x57, 0xa5, - 0xd4, 0xa1, 0x3f, 0xf3, 0xe2, 0x71, 0x62, 0x6d, 0xf2, 0x11, 0x40, 0xb5, 0x5c, 0x70, 0x96, 0xa5, - 0x4b, 0xc9, 0xc3, 0x9e, 0xf5, 0x0d, 0x1a, 0xe4, 0x47, 0xc9, 0x49, 0x0c, 0x93, 0x82, 0xae, 0xd2, - 0xbb, 0x92, 0x2f, 0x0b, 0x4c, 0xb3, 0x72, 0x29, 0x74, 0xd8, 0xb7, 0xe1, 0x87, 0x05, 0x5d, 0xbd, - 0xb6, 0xf0, 0xdc, 0xa0, 0x64, 0x66, 0x58, 0xad, 0xd2, 0x37, 0x8c, 0x63, 0x7a, 0x8b, 0x75, 0xb8, - 0x37, 0xf3, 0xe2, 0x7e, 0x02, 0x05, 0x5d, 0x7d, 0xc3, 0x38, 0x5e, 0x62, 0x4d, 0x9e, 0xc3, 0x28, - 0xa7, 0x9a, 0xa6, 0x19, 0x0a, 0x8d, 0x32, 0xdc, 0xb7, 0xb5, 0xc0, 0x40, 0x73, 0x8b, 0x18, 0x7e, - 0x92, 0x66, 0xb7, 0xe1, 0xc0, 0xde, 0x58, 0xdb, 0xf0, 0xa3, 0x79, 0xc1, 0x44, 0x6a, 0x99, 0x0f, - 0x6d, 0xe9, 0xc0, 0x22, 0xd7, 0x86, 0xfe, 0xd7, 0x30, 0x68, 0xb8, 0xa9, 0x30, 0x98, 0xf5, 0xe2, - 0xd1, 0xd9, 0x27, 0xa7, 0xeb, 0x6e, 0x9c, 0x36, 0xf4, 0x2e, 0xc4, 0x9b, 0x52, 0x16, 0x54, 0xb3, - 0x52, 0x7c, 0x87, 0x4a, 0xd1, 0xb7, 0x98, 0xb4, 0x31, 0xe4, 0x09, 0x0c, 0x05, 0xde, 0xa7, 0x77, - 0x2c, 0x57, 0x21, 0xcc, 0x7a, 0xf1, 0x38, 0x19, 0x08, 0xbc, 0x7f, 0xcd, 0x72, 0x45, 0x3e, 0x86, - 0x83, 0x1c, 0x39, 0x6a, 0xcc, 0x9b, 0xeb, 0x91, 0xbd, 0x1e, 0x39, 0xcc, 0xb8, 0x44, 0x0a, 0x8e, - 0xd6, 0xcd, 0x4e, 0x50, 0x55, 0xa5, 0x50, 0x48, 0x62, 0x78, 0xdc, 0x64, 0xbf, 0x61, 0xef, 0xf0, - 0x8a, 0x15, 0x4c, 0x5b, 0x05, 0xfa, 0x49, 0x17, 0x26, 0x4f, 0x21, 0x50, 0x98, 0x49, 0xd4, 0x97, - 0x58, 0x5b, 0x4d, 0x82, 0x64, 0x03, 0x90, 0x0f, 0x60, 0x9f, 0x23, 0xcd, 0x51, 0x3a, 0x51, 0xdc, - 0x29, 0xfa, 0xdd, 0x87, 0xf0, 0x9f, 0x1e, 0x66, 0x15, 0xcf, 0x6d, 0xbd, 0x71, 0xe2, 0xb3, 0xdc, - 0x74, 0x54, 0xb1, 0x77, 0x68, 0xb3, 0xf7, 0x13, 0x6b, 0x93, 0x67, 0x00, 0x59, 0xc9, 0x39, 0x66, - 0x26, 0xd0, 0x25, 0xdf, 0x42, 0x4c, 0xc7, 0xad, 0x88, 0x1b, 0xb1, 0xfb, 0x49, 0x60, 0x90, 0x46, - 0xe7, 0x75, 0x5f, 0x9c, 0x43, 0xa3, 0xb3, 0xeb, 0x4b, 0xe3, 0xf2, 0x39, 0x90, 0xb6, 0x75, 0x8b, - 0x7a, 0xed, 0xb8, 0x6f, 0x1d, 0x27, 0xee, 0xe6, 0x65, 0xdd, 0x7a, 0x7f, 0x08, 0x81, 0x44, 0x9a, - 0xa7, 0xa5, 0xe0, 0xb5, 0x95, 0x7e, 0x98, 0x0c, 0x0d, 0xf0, 0x4a, 0xf0, 0x9a, 0x7c, 0x06, 0x47, - 0x12, 0x2b, 0xce, 0x32, 0x9a, 0x56, 0x9c, 0x66, 0x58, 0xa0, 0x68, 0xa7, 0x60, 0xe2, 0x2e, 0xae, - 0x5b, 0x9c, 0x84, 0x30, 0xb8, 0x43, 0xa9, 0xcc, 0xb3, 0x02, 0xeb, 0xd2, 0x1e, 0xc9, 0x04, 0x7a, - 0x5a, 0xf3, 0x10, 0x2c, 0x6a, 0xcc, 0x68, 0x00, 0x7b, 0xe7, 0x45, 0xa5, 0xeb, 0xe8, 0x37, 0x0f, - 0x1e, 0xdf, 0x2c, 0x2b, 0x94, 0x2f, 0x79, 0x99, 0xdd, 0x9e, 0xaf, 0xb4, 0xa4, 0xe4, 0x15, 0x1c, - 0xa2, 0xa4, 0x6a, 0x29, 0x0d, 0xf7, 0x9c, 0x89, 0xb7, 0xb6, 0xa5, 0xa3, 0xb3, 0x78, 0x6b, 0xb8, - 0x3a, 0x31, 0xa7, 0xe7, 0x4d, 0xc0, 0xdc, 0xfa, 0x27, 0x63, 0xdc, 0x3e, 0x4e, 0x7f, 0x82, 0xf1, - 0xce, 0xbd, 0x11, 0xc6, 0x0c, 0xbe, 0x93, 0xca, 0xda, 0x46, 0xf1, 0x8a, 0x4a, 0xa6, 0x6b, 0xb7, - 0xa0, 0xee, 0x64, 0x04, 0x71, 0xfb, 0x67, 0xe6, 0xb0, 0x67, 0xe7, 0x30, 0x68, 0x90, 0x8b, 0x5c, - 0x45, 0x9f, 0xc2, 0xf1, 0x9c, 0x33, 0x14, 0xfa, 0x8a, 0x29, 0x8d, 0x22, 0xc1, 0x5f, 0x96, 0xa8, - 0xb4, 0xa9, 0x20, 0x68, 0x81, 0x6e, 0xfd, 0xad, 0x1d, 0xfd, 0x0a, 0x87, 0xcd, 0xe8, 0x5c, 0x95, - 0x99, 0x9d, 0x1b, 0xd3, 0x18, 0xb3, 0xf7, 0x8d, 0x93, 0x31, 0x3b, 0x1f, 0x04, 0xbf, 0xfb, 0x41, - 0xd8, 0xde, 0x98, 0xde, 0xbf, 0x6f, 0x4c, 0xff, 0xfd, 0x8d, 0xf9, 0x01, 0x8e, 0xaf, 0xca, 0xf2, - 0x76, 0x59, 0x35, 0x34, 0x5a, 0xae, 0xbb, 0x2f, 0xf4, 0x66, 0x3d, 0x53, 0x73, 0xfd, 0xc2, 0xce, - 0xc4, 0xfa, 0xdd, 0x89, 0x8d, 0xfe, 0xf4, 0xe0, 0x64, 0x37, 0xad, 0xdb, 0xc5, 0x9f, 0xe1, 0x78, - 0x9d, 0x37, 0xe5, 0xee, 0xcd, 0x4d, 0x81, 0xd1, 0xd9, 0x8b, 0x2d, 0x31, 0x1f, 0x8a, 0x6e, 0x3f, - 0x1f, 0x79, 0xdb, 0xac, 0xe4, 0xe8, 0xae, 0x83, 0xa8, 0xe9, 0x0a, 0x26, 0x5d, 0x37, 0x33, 0xd0, - 0xeb, 0xaa, 0xae, 0xb3, 0xc3, 0x36, 0x92, 0x7c, 0x01, 0xc1, 0x86, 0x88, 0x6f, 0x89, 0x1c, 0xef, - 0x10, 0x71, 0xb5, 0x36, 0x5e, 0xe4, 0x04, 0xf6, 0x50, 0xca, 0xb2, 0xfd, 0x10, 0x34, 0x87, 0xe8, - 0x2b, 0x18, 0xfe, 0x6f, 0x15, 0xcf, 0xfe, 0xf2, 0x60, 0x70, 0x83, 0xf4, 0x1e, 0x31, 0x27, 0x17, - 0x30, 0xbe, 0x41, 0x91, 0x6f, 0x7e, 0x1b, 0x27, 0x5b, 0x7c, 0xd6, 0xe8, 0xf4, 0xe9, 0x43, 0x68, - 0xdb, 0xab, 0xe8, 0x51, 0xec, 0xbd, 0xf0, 0xc8, 0x35, 0x8c, 0x2f, 0x11, 0xab, 0x79, 0x29, 0x04, - 0x66, 0x1a, 0x73, 0xf2, 0x6c, 0x2b, 0xe8, 0x81, 0x21, 0x9d, 0x3e, 0x79, 0xef, 0x6b, 0xdd, 0xbe, - 0xc9, 0x65, 0xfc, 0x1e, 0x0e, 0xb6, 0xb5, 0xd9, 0x49, 0xf8, 0xc0, 0x24, 0x4d, 0x9f, 0xff, 0x87, - 0xa8, 0xd1, 0xa3, 0xc5, 0xbe, 0xfd, 0x6b, 0x7e, 0xf9, 0x77, 0x00, 0x00, 0x00, 0xff, 0xff, 0xa1, - 0x9f, 0x87, 0x2e, 0x45, 0x07, 0x00, 0x00, + // 2183 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xd4, 0x59, 0xcd, 0x6f, 0x1b, 0xc7, + 0x15, 0xf7, 0x92, 0x94, 0x48, 0x3e, 0x7e, 0x8f, 0x64, 0x85, 0x66, 0x62, 0x8b, 0xde, 0xa4, 0x88, + 0xec, 0xa6, 0x6a, 0xaa, 0x04, 0x68, 0xd0, 0x34, 0x08, 0x2c, 0x59, 0x71, 0x05, 0x5b, 0x8a, 0xbd, + 0x72, 0x1d, 0xa0, 0x40, 0xb1, 0x19, 0xee, 0x8e, 0xa4, 0x85, 0xf6, 0xab, 0xbb, 0x43, 0x59, 0x4c, + 0x2f, 0x05, 0x7a, 0xec, 0xa9, 0xe8, 0xa1, 0xff, 0x42, 0x2f, 0x3d, 0xb5, 0x97, 0x5e, 0x72, 0xe9, + 0x7f, 0xd4, 0x6b, 0x2e, 0xc5, 0x7c, 0xed, 0xce, 0x2e, 0x49, 0x29, 0x0a, 0x90, 0x83, 0x6f, 0xbb, + 0xef, 0xbd, 0x79, 0xf3, 0xf6, 0xf7, 0xe6, 0xbd, 0xf9, 0x3d, 0x12, 0xda, 0x01, 0x4e, 0x29, 0x49, + 0xb6, 0xe3, 0x24, 0xa2, 0x11, 0x6a, 0x8a, 0x37, 0x3b, 0x9e, 0x98, 0x7f, 0x59, 0x85, 0xe6, 0x6f, + 0x08, 0x4e, 0xe8, 0x84, 0x60, 0x8a, 0xba, 0x50, 0xf1, 0xe2, 0xa1, 0x31, 0x36, 0xb6, 0x9a, 0x56, + 0xc5, 0x8b, 0x11, 0x82, 0x5a, 0x1c, 0x25, 0x74, 0x58, 0x19, 0x1b, 0x5b, 0x1d, 0x8b, 0x3f, 0xa3, + 0xbb, 0x00, 0xf1, 0x74, 0xe2, 0x7b, 0x8e, 0x3d, 0x4d, 0xfc, 0x61, 0x95, 0xdb, 0x36, 0x85, 0xe4, + 0xb7, 0x89, 0x8f, 0xb6, 0xa0, 0x1f, 0xe0, 0x4b, 0xfb, 0x22, 0xf2, 0xa7, 0x01, 0xb1, 0x9d, 0x68, + 0x1a, 0xd2, 0x61, 0x8d, 0x2f, 0xef, 0x06, 0xf8, 0xf2, 0x15, 0x17, 0xef, 0x31, 0x29, 0x1a, 0xb3, + 0xa8, 0x2e, 0xed, 0x13, 0xcf, 0x27, 0xf6, 0x39, 0x99, 0x0d, 0x57, 0xc6, 0xc6, 0x56, 0xcd, 0x82, + 0x00, 0x5f, 0x7e, 0xe1, 0xf9, 0xe4, 0x29, 0x99, 0xa1, 0x4d, 0x68, 0xb9, 0x98, 0x62, 0xdb, 0x21, + 0x21, 0x25, 0xc9, 0x70, 0x95, 0xef, 0x05, 0x4c, 0xb4, 0xc7, 0x25, 0x2c, 0xbe, 0x04, 0x3b, 0xe7, + 0xc3, 0x3a, 0xd7, 0xf0, 0x67, 0x16, 0x1f, 0x76, 0x03, 0x2f, 0xb4, 0x79, 0xe4, 0x0d, 0xbe, 0x75, + 0x93, 0x4b, 0x9e, 0xb3, 0xf0, 0x3f, 0x83, 0xba, 0x88, 0x2d, 0x1d, 0x36, 0xc7, 0xd5, 0xad, 0xd6, + 0xce, 0xbb, 0xdb, 0x19, 0x1a, 0xdb, 0x22, 0xbc, 0x83, 0xf0, 0x24, 0x4a, 0x02, 0x4c, 0xbd, 0x28, + 0x3c, 0x24, 0x69, 0x8a, 0x4f, 0x89, 0xa5, 0xd6, 0xa0, 0x03, 0x68, 0x85, 0xe4, 0xb5, 0xad, 0x5c, + 0x00, 0x77, 0xb1, 0x35, 0xe7, 0xe2, 0xf8, 0x2c, 0x4a, 0xe8, 0x02, 0x3f, 0x10, 0x92, 0xd7, 0xaf, + 0xa4, 0xab, 0x17, 0xd0, 0x73, 0x89, 0x4f, 0x28, 0x71, 0x33, 0x77, 0xad, 0x1b, 0xba, 0xeb, 0x4a, + 0x07, 0xca, 0xe5, 0x7b, 0xd0, 0x3d, 0xc3, 0xa9, 0x1d, 0x46, 0x99, 0xc7, 0xf6, 0xd8, 0xd8, 0x6a, + 0x58, 0xed, 0x33, 0x9c, 0x1e, 0x45, 0xca, 0xea, 0x09, 0x34, 0x89, 0x63, 0xa7, 0x67, 0x38, 0x71, + 0xd3, 0x61, 0x9f, 0x6f, 0xf9, 0x70, 0x6e, 0xcb, 0x7d, 0xe7, 0x98, 0x19, 0x2c, 0xd8, 0xb4, 0x41, + 0x84, 0x2a, 0x45, 0x47, 0xd0, 0x61, 0x60, 0xe4, 0xce, 0x06, 0x37, 0x76, 0xc6, 0xd0, 0xdc, 0x57, + 0xfe, 0x5e, 0xc1, 0x40, 0x21, 0x92, 0xfb, 0x44, 0x37, 0xf6, 0xa9, 0x60, 0xcd, 0xfc, 0xbe, 0x0f, + 0x7d, 0x09, 0x4b, 0xee, 0x76, 0x8d, 0x03, 0xd3, 0xe1, 0xc0, 0x28, 0x43, 0xf3, 0x4f, 0x15, 0x18, + 0x64, 0xd5, 0x60, 0x91, 0x34, 0x8e, 0xc2, 0x94, 0xa0, 0x87, 0x30, 0x90, 0xc7, 0x39, 0xf5, 0xbe, + 0x21, 0xb6, 0xef, 0x05, 0x1e, 0xe5, 0x45, 0x52, 0xb3, 0x7a, 0x42, 0x71, 0xec, 0x7d, 0x43, 0x9e, + 0x31, 0x31, 0xda, 0x80, 0x55, 0x9f, 0x60, 0x97, 0x24, 0xbc, 0x66, 0x9a, 0x96, 0x7c, 0x43, 0xef, + 0x43, 0x2f, 0x20, 0x34, 0xf1, 0x9c, 0xd4, 0xc6, 0xae, 0x9b, 0x90, 0x34, 0x95, 0xa5, 0xd3, 0x95, + 0xe2, 0x47, 0x42, 0x8a, 0x3e, 0x81, 0xa1, 0x32, 0xf4, 0xd8, 0x19, 0xbf, 0xc0, 0xbe, 0x9d, 0x12, + 0x27, 0x0a, 0xdd, 0x54, 0xd6, 0xd1, 0x86, 0xd4, 0x1f, 0x48, 0xf5, 0xb1, 0xd0, 0xa2, 0xc7, 0xd0, + 0x4f, 0x69, 0x94, 0xe0, 0x53, 0x62, 0x4f, 0xb0, 0x73, 0x4e, 0xd8, 0x8a, 0x15, 0x0e, 0xde, 0x1d, + 0x0d, 0xbc, 0x63, 0x61, 0xb2, 0x2b, 0x2c, 0xac, 0x5e, 0x5a, 0x78, 0x4f, 0xcd, 0xef, 0xaa, 0x30, + 0x5c, 0x56, 0x06, 0xbc, 0x3f, 0xb8, 0xfc, 0xd3, 0x3b, 0x56, 0xc5, 0x73, 0x59, 0xfd, 0x31, 0x48, + 0xf8, 0xb7, 0xd6, 0x2c, 0xfe, 0x8c, 0xee, 0x01, 0x38, 0x91, 0xef, 0x13, 0x87, 0x2d, 0x94, 0x1f, + 0xa9, 0x49, 0x58, 0x7d, 0xf2, 0x92, 0xcf, 0x5b, 0x43, 0xcd, 0x6a, 0x32, 0x89, 0xe8, 0x0a, 0xf7, + 0xa1, 0x2d, 0xd2, 0x27, 0x0d, 0x44, 0x57, 0x68, 0x09, 0x99, 0x30, 0xf9, 0x00, 0x90, 0x3a, 0x26, + 0x93, 0x59, 0x66, 0xb8, 0xca, 0x0d, 0xfb, 0x52, 0xb3, 0x3b, 0x53, 0xd6, 0x6f, 0x43, 0x33, 0x21, + 0xd8, 0xb5, 0xa3, 0xd0, 0x9f, 0xf1, 0x46, 0xd1, 0xb0, 0x1a, 0x4c, 0xf0, 0x65, 0xe8, 0xcf, 0xd0, + 0x4f, 0x61, 0x90, 0x90, 0xd8, 0xf7, 0x1c, 0x6c, 0xc7, 0x3e, 0x76, 0x48, 0x40, 0x42, 0xd5, 0x33, + 0xfa, 0x52, 0xf1, 0x5c, 0xc9, 0xd1, 0x10, 0xea, 0x17, 0x24, 0x49, 0xd9, 0x67, 0x35, 0xb9, 0x89, + 0x7a, 0x45, 0x7d, 0xa8, 0x52, 0xea, 0x0f, 0x81, 0x4b, 0xd9, 0x23, 0x7a, 0x00, 0x7d, 0x27, 0x0a, + 0x62, 0xec, 0x50, 0x3b, 0x21, 0x17, 0x1e, 0x5f, 0xd4, 0xe2, 0xea, 0x9e, 0x94, 0x5b, 0x52, 0xcc, + 0x3e, 0x27, 0x88, 0x5c, 0xef, 0xc4, 0x23, 0xae, 0x8d, 0xa9, 0x4c, 0x36, 0x2f, 0xdc, 0xaa, 0xd5, + 0x57, 0x9a, 0x47, 0x54, 0xa4, 0x19, 0x6d, 0xc3, 0x5a, 0x42, 0x82, 0x88, 0x12, 0x5b, 0x25, 0x3b, + 0xc4, 0x01, 0x19, 0x76, 0x38, 0xce, 0x03, 0xa1, 0x92, 0x39, 0x3e, 0xc2, 0x01, 0x61, 0xde, 0x4b, + 0xf6, 0xac, 0xd7, 0x76, 0xb9, 0x79, 0xbf, 0x60, 0xfe, 0x94, 0xcc, 0xcc, 0x7f, 0x18, 0x70, 0xf7, + 0xca, 0x96, 0x33, 0x77, 0x04, 0xae, 0x4b, 0xf7, 0x8f, 0x85, 0xb0, 0x39, 0x85, 0xcd, 0x6b, 0x1a, + 0xc1, 0x35, 0xb1, 0x56, 0xe6, 0x62, 0x35, 0xa1, 0x43, 0x1c, 0xdb, 0x0b, 0x5d, 0x72, 0x69, 0x4f, + 0x3c, 0x2a, 0x4a, 0xb4, 0x63, 0xb5, 0x88, 0x73, 0xc0, 0x64, 0xbb, 0x1e, 0x4d, 0xcd, 0x6f, 0x0d, + 0xe8, 0x16, 0x6b, 0x88, 0x55, 0x01, 0x9d, 0xc5, 0x44, 0xde, 0x9b, 0xfc, 0x59, 0x6e, 0x5d, 0x91, + 0x37, 0xa9, 0x8b, 0x0e, 0x00, 0xe2, 0x24, 0x8a, 0x49, 0x42, 0x3d, 0xc2, 0xfc, 0xb2, 0xb2, 0x7c, + 0xb0, 0xb4, 0x2c, 0xb7, 0x9f, 0x67, 0xb6, 0xfb, 0x21, 0x4d, 0x66, 0x96, 0xb6, 0x78, 0xf4, 0x19, + 0xf4, 0x4a, 0x6a, 0x86, 0x0e, 0xcb, 0xaa, 0x08, 0x80, 0x3d, 0xa2, 0x75, 0x58, 0xb9, 0xc0, 0xfe, + 0x94, 0xc8, 0x10, 0xc4, 0xcb, 0xaf, 0x2a, 0x9f, 0x18, 0x66, 0x1d, 0x56, 0xf6, 0x83, 0x98, 0xce, + 0xd8, 0x97, 0xf4, 0x8e, 0xa7, 0x31, 0x49, 0x76, 0xfd, 0xc8, 0x39, 0xdf, 0xbf, 0xa4, 0x09, 0x46, + 0x5f, 0x42, 0x97, 0x24, 0x38, 0x9d, 0x26, 0xac, 0xaa, 0x5c, 0x2f, 0x3c, 0xe5, 0x3e, 0x8b, 0x57, + 0x52, 0x69, 0xcd, 0xf6, 0xbe, 0x58, 0xb0, 0xc7, 0xed, 0xad, 0x0e, 0xd1, 0x5f, 0x47, 0xbf, 0x83, + 0x4e, 0x41, 0xcf, 0xc0, 0x62, 0x17, 0xb8, 0xcc, 0x0a, 0x7f, 0x66, 0x4d, 0x33, 0xc6, 0x89, 0x47, + 0x67, 0x92, 0x68, 0xc8, 0x37, 0xd6, 0x2a, 0x64, 0xe3, 0xf5, 0x5c, 0x01, 0x5a, 0xc7, 0x6a, 0x0a, + 0xc9, 0x81, 0x9b, 0x9a, 0x4f, 0x60, 0xfd, 0x29, 0x21, 0xf1, 0x5e, 0x14, 0x86, 0xc4, 0xa1, 0xc4, + 0xb5, 0xc8, 0x1f, 0xa6, 0x24, 0xa5, 0x6c, 0x0b, 0x5e, 0x13, 0x32, 0x1f, 0xec, 0x99, 0x75, 0x81, + 0xd3, 0x24, 0x76, 0x6c, 0x8d, 0xce, 0x34, 0x98, 0x80, 0x71, 0x02, 0xf3, 0xef, 0x06, 0x74, 0xc5, + 0x59, 0x7a, 0x16, 0x39, 0xfc, 0x04, 0x31, 0x44, 0x19, 0xbd, 0x91, 0x88, 0x4e, 0x13, 0xbf, 0xc4, + 0x7b, 0x2a, 0x65, 0xde, 0x73, 0x07, 0x1a, 0x9c, 0x18, 0xe4, 0x91, 0xd6, 0xd9, 0x5d, 0xef, 0xb9, + 0x69, 0xde, 0xd2, 0x5c, 0xa1, 0xae, 0x71, 0x75, 0x4b, 0xdd, 0xdd, 0xcc, 0x24, 0xbf, 0x36, 0x56, + 0xf4, 0x6b, 0xc3, 0x7c, 0x09, 0x6b, 0xcf, 0xa2, 0xe8, 0x7c, 0x1a, 0x8b, 0xf0, 0xd4, 0x17, 0x16, + 0x81, 0x31, 0xc6, 0x55, 0x16, 0x4b, 0x06, 0xcc, 0x75, 0xe7, 0xdc, 0xfc, 0x9f, 0x01, 0xeb, 0x45, + 0xb7, 0xf2, 0xa6, 0xfb, 0x1a, 0xd6, 0x32, 0xbf, 0xb6, 0x2f, 0xb1, 0x10, 0x1b, 0xb4, 0x76, 0x3e, + 0xd4, 0xce, 0xc0, 0xa2, 0xd5, 0x8a, 0x3d, 0xb9, 0x0a, 0x44, 0x6b, 0x70, 0x51, 0x92, 0xa4, 0xa3, + 0x4b, 0xe8, 0x97, 0xcd, 0x58, 0x6e, 0xb2, 0x5d, 0x25, 0xe2, 0x0d, 0xb5, 0x12, 0xfd, 0x02, 0x9a, + 0x79, 0x20, 0x15, 0x1e, 0xc8, 0x5a, 0x21, 0x10, 0xb9, 0x57, 0x6e, 0xc5, 0xce, 0x3e, 0x49, 0x92, + 0x28, 0x91, 0xdd, 0x48, 0xbc, 0x98, 0x9f, 0x42, 0xe3, 0x07, 0x67, 0xd7, 0xfc, 0x57, 0x05, 0x3a, + 0x8f, 0xd2, 0xd4, 0x3b, 0x0d, 0x55, 0x0a, 0xd6, 0x61, 0x45, 0xdc, 0x3b, 0x82, 0x08, 0x88, 0x17, + 0x34, 0x86, 0x96, 0x6c, 0x6a, 0x1a, 0xf4, 0xba, 0xe8, 0xda, 0x7e, 0x29, 0x1b, 0x5d, 0x4d, 0x84, + 0xc6, 0xae, 0x92, 0x12, 0x0b, 0x5e, 0x59, 0xca, 0x82, 0x57, 0x35, 0x16, 0xfc, 0x36, 0x34, 0xf9, + 0xa2, 0x30, 0x72, 0x89, 0xa4, 0xc7, 0x0d, 0x26, 0x38, 0x8a, 0x5c, 0x82, 0x76, 0x60, 0x23, 0x20, + 0x41, 0x94, 0xcc, 0xec, 0x00, 0xc7, 0x36, 0x23, 0xe1, 0x9c, 0xd8, 0x04, 0x13, 0xd9, 0x98, 0x91, + 0xd0, 0x1e, 0xe2, 0xf8, 0x10, 0x5f, 0x32, 0x6e, 0x73, 0x38, 0x41, 0x3b, 0x70, 0xfb, 0xab, 0xc4, + 0xa3, 0x78, 0xe2, 0x93, 0x22, 0xb9, 0x17, 0x8d, 0x7a, 0x4d, 0x29, 0x35, 0x86, 0x6f, 0xfe, 0xcd, + 0x80, 0xae, 0x42, 0x4d, 0x9e, 0xb0, 0x3e, 0x54, 0x4f, 0xb2, 0x2c, 0xb3, 0x47, 0x95, 0x8b, 0xca, + 0xb2, 0x5c, 0xcc, 0x4d, 0x18, 0x19, 0xf2, 0x35, 0x1d, 0xf9, 0x2c, 0xe9, 0x2b, 0x5a, 0xd2, 0x19, + 0x34, 0x78, 0x4a, 0xcf, 0x14, 0x34, 0xec, 0xd9, 0x3c, 0x85, 0xc1, 0x31, 0xc5, 0xd4, 0x4b, 0xa9, + 0xe7, 0xa4, 0x2a, 0x9d, 0xa5, 0xc4, 0x19, 0xd7, 0x25, 0xae, 0xb2, 0x2c, 0x71, 0xd5, 0x2c, 0x71, + 0xe6, 0x7f, 0x0d, 0x40, 0xfa, 0x4e, 0x12, 0x82, 0x1f, 0x61, 0x2b, 0x06, 0x19, 0x8d, 0x28, 0xa3, + 0x8a, 0x8c, 0x8e, 0x49, 0x52, 0xc5, 0x25, 0x2c, 0x7d, 0xec, 0x34, 0x4c, 0x53, 0xe2, 0x0a, 0xad, + 0x60, 0x54, 0x0d, 0x26, 0xe0, 0xca, 0x22, 0x21, 0x5b, 0x2d, 0x11, 0x32, 0xf3, 0x11, 0xb4, 0xe4, + 0xe5, 0xf4, 0x92, 0x5d, 0x6c, 0xd7, 0x47, 0x2f, 0xa3, 0xab, 0xe4, 0x40, 0x8c, 0x01, 0xf6, 0xf2, + 0xe8, 0x17, 0xb4, 0x67, 0xf3, 0x8f, 0x70, 0x3b, 0xb7, 0x78, 0xe6, 0xa5, 0x54, 0xe5, 0xe5, 0x63, + 0xd8, 0xf0, 0x42, 0xc7, 0x9f, 0xba, 0xc4, 0x0e, 0xd9, 0xf5, 0xee, 0x67, 0x93, 0x8d, 0xc1, 0xa9, + 0xdc, 0xba, 0xd4, 0x1e, 0x71, 0xa5, 0x9a, 0x70, 0x3e, 0x00, 0xa4, 0x56, 0x11, 0x27, 0x5b, 0x51, + 0xe1, 0x2b, 0xfa, 0x52, 0xb3, 0xef, 0x48, 0x6b, 0xf3, 0x05, 0x6c, 0x94, 0x37, 0x97, 0xa9, 0xfa, + 0x25, 0xb4, 0x72, 0xd8, 0x55, 0x1f, 0xbc, 0xad, 0xb5, 0x9f, 0x7c, 0x9d, 0xa5, 0x5b, 0x9a, 0x3f, + 0x83, 0xb7, 0x72, 0xd5, 0x63, 0xde, 0xe8, 0xaf, 0xb8, 0x9d, 0xcc, 0x11, 0x0c, 0xe7, 0xcd, 0x45, + 0x0c, 0xe6, 0x5f, 0xab, 0xd0, 0x7e, 0x2c, 0x2b, 0x97, 0x71, 0x1c, 0x8d, 0xd5, 0x08, 0x6a, 0x71, + 0x1f, 0xda, 0x85, 0x82, 0x14, 0x64, 0xbc, 0x75, 0xa1, 0x8d, 0xda, 0x8b, 0x86, 0xf2, 0x2a, 0x37, + 0x2b, 0x0f, 0xe5, 0x0f, 0x61, 0x70, 0x92, 0x10, 0x32, 0x3f, 0xbf, 0xd7, 0xac, 0x1e, 0x53, 0xe8, + 0xb6, 0xdb, 0xb0, 0x86, 0x1d, 0xea, 0x5d, 0x94, 0xac, 0xc5, 0xf9, 0x1a, 0x08, 0x95, 0x6e, 0xff, + 0x45, 0x16, 0xa8, 0x17, 0x9e, 0x44, 0xe9, 0x70, 0xf5, 0xfb, 0xcf, 0xdf, 0xf2, 0x6b, 0x98, 0x26, + 0x45, 0xcf, 0xa1, 0xab, 0xe6, 0x38, 0xe9, 0xa9, 0x7e, 0xe3, 0x19, 0xb1, 0x4d, 0x72, 0x55, 0xaa, + 0x91, 0xea, 0xc2, 0x97, 0x34, 0xc4, 0x97, 0x08, 0x95, 0xde, 0xd8, 0xfe, 0x5d, 0x81, 0x86, 0x85, + 0x9d, 0xf3, 0x37, 0x3b, 0x1f, 0x9f, 0x43, 0x2f, 0xbb, 0x23, 0x0a, 0x29, 0x79, 0x4b, 0x03, 0x52, + 0x3f, 0x7a, 0x56, 0xc7, 0xd5, 0xde, 0x96, 0xc2, 0x56, 0x5f, 0x06, 0xdb, 0x3f, 0x2b, 0xd0, 0x7d, + 0x9c, 0xdd, 0x5b, 0x6f, 0x36, 0x78, 0x3b, 0x00, 0xec, 0xa2, 0x2d, 0xe0, 0xa6, 0x13, 0x13, 0x75, + 0x3c, 0xac, 0x66, 0x22, 0x9f, 0x6e, 0x8e, 0xd7, 0xb7, 0x15, 0x68, 0xbf, 0x8c, 0xe2, 0xc8, 0x8f, + 0x4e, 0x67, 0x6f, 0x36, 0x5a, 0xfb, 0x30, 0xd0, 0x38, 0x4c, 0x01, 0xb4, 0x3b, 0xa5, 0xc3, 0x96, + 0x1f, 0x0e, 0xab, 0xe7, 0x16, 0xde, 0x6f, 0x0e, 0xe0, 0x1a, 0x0c, 0x24, 0xaf, 0xcf, 0xaf, 0x14, + 0xf3, 0xcf, 0x06, 0x20, 0x5d, 0x2a, 0x7b, 0xfd, 0xaf, 0xa1, 0x43, 0x25, 0xd6, 0x3c, 0x3e, 0x39, + 0xf9, 0xe8, 0xb5, 0xa0, 0xe7, 0xc2, 0x6a, 0x53, 0x3d, 0x33, 0x3f, 0x87, 0xf5, 0xb9, 0xdf, 0x88, + 0x18, 0xa1, 0x12, 0x19, 0x19, 0x94, 0x7e, 0x26, 0x3a, 0x9c, 0x98, 0x1f, 0xc3, 0x6d, 0x41, 0xa2, + 0xd5, 0x3d, 0xa4, 0xee, 0x87, 0x39, 0x36, 0xdc, 0xc9, 0xd9, 0xb0, 0xf9, 0x9d, 0x01, 0x1b, 0xe5, + 0x65, 0x32, 0xfe, 0xab, 0xd6, 0x21, 0x0c, 0x48, 0xf6, 0x4b, 0x9d, 0xd7, 0x0b, 0x3a, 0xfd, 0xd1, + 0x1c, 0xaf, 0x2f, 0xfb, 0xde, 0x56, 0x7d, 0x34, 0xa7, 0xf6, 0xfd, 0xb4, 0x28, 0x48, 0x47, 0x18, + 0x06, 0x73, 0x66, 0x6c, 0x2a, 0x52, 0xfb, 0xca, 0x98, 0xea, 0x72, 0xe1, 0x0f, 0x20, 0xf6, 0xe6, + 0x26, 0xdc, 0x7d, 0x42, 0xe8, 0x21, 0xb7, 0xd9, 0x8b, 0xc2, 0x13, 0xef, 0x74, 0x9a, 0x08, 0xa3, + 0x3c, 0xb5, 0xf7, 0x96, 0x59, 0x48, 0x98, 0x16, 0xfc, 0x10, 0x67, 0xdc, 0xf8, 0x87, 0xb8, 0xca, + 0x55, 0x3f, 0xc4, 0x99, 0x9f, 0xc2, 0x90, 0x9d, 0x2c, 0x19, 0x85, 0xef, 0x91, 0x90, 0x66, 0x3c, + 0x73, 0x13, 0x5a, 0x0e, 0x97, 0xd8, 0xda, 0x4f, 0x06, 0x20, 0x44, 0x8c, 0x5f, 0x99, 0xbb, 0x70, + 0x67, 0xc1, 0x62, 0x19, 0xfc, 0x4f, 0xa0, 0xcb, 0xa7, 0x58, 0x19, 0x39, 0x51, 0xb3, 0x5f, 0x87, + 0x49, 0x1f, 0x29, 0xe1, 0xce, 0x7f, 0xea, 0x50, 0x3f, 0x26, 0xf8, 0x35, 0x21, 0x2e, 0x3a, 0x80, + 0xce, 0x31, 0x09, 0xdd, 0xfc, 0x37, 0xfe, 0x75, 0x0d, 0xe4, 0x4c, 0x3a, 0x7a, 0x67, 0x91, 0x34, + 0xe3, 0x20, 0xb7, 0xb6, 0x8c, 0x0f, 0x0d, 0xf4, 0x02, 0x3a, 0x85, 0x79, 0x1b, 0x6d, 0x6a, 0x8b, + 0x16, 0x4d, 0xe2, 0xa3, 0x3b, 0x73, 0x37, 0xb2, 0x4a, 0x6b, 0xe6, 0xb2, 0xad, 0x8f, 0x92, 0xe8, + 0xde, 0xd2, 0x19, 0x53, 0x38, 0xdc, 0xbc, 0x66, 0x06, 0x35, 0x6f, 0xa1, 0xcf, 0x61, 0x55, 0xcc, + 0x1c, 0x68, 0xa8, 0x19, 0x17, 0x86, 0xb7, 0x42, 0x5c, 0xc5, 0x01, 0xc5, 0xbc, 0x85, 0x9e, 0x02, + 0xe4, 0xac, 0x1d, 0xbd, 0x53, 0xf8, 0x91, 0xa6, 0x34, 0x36, 0x8c, 0xee, 0x2e, 0xd1, 0x66, 0xce, + 0xbe, 0x82, 0x6e, 0x91, 0x5b, 0xa2, 0xf1, 0x42, 0xfa, 0xa8, 0x35, 0xa8, 0xd1, 0xfd, 0x2b, 0x2c, + 0x32, 0xc7, 0xbf, 0x87, 0x7e, 0x99, 0x32, 0x22, 0x73, 0xe1, 0xc2, 0x02, 0xfd, 0x1c, 0xbd, 0x7b, + 0xa5, 0x8d, 0x0e, 0x42, 0xde, 0x23, 0x0b, 0x20, 0xcc, 0x35, 0xd4, 0x02, 0x08, 0xf3, 0x8d, 0x55, + 0x80, 0x50, 0x6c, 0x2c, 0x05, 0x10, 0x16, 0xb6, 0xc1, 0x02, 0x08, 0x8b, 0xbb, 0x92, 0x79, 0x0b, + 0x45, 0xb0, 0xb1, 0xb8, 0xdc, 0x91, 0xfe, 0x83, 0xd5, 0x95, 0x3d, 0x63, 0xf4, 0xe0, 0x7b, 0x58, + 0x66, 0x1b, 0x7e, 0x0d, 0x83, 0xb9, 0xea, 0x44, 0x3a, 0xa4, 0xcb, 0x0a, 0x7f, 0xf4, 0xde, 0xd5, + 0x46, 0x6a, 0x87, 0xc9, 0x2a, 0xff, 0x8b, 0xee, 0xa3, 0xff, 0x07, 0x00, 0x00, 0xff, 0xff, 0x6a, + 0xee, 0xfc, 0x90, 0xb2, 0x1b, 0x00, 0x00, } diff --git a/weed/pb/proto_read_write_test.go b/weed/pb/proto_read_write_test.go new file mode 100644 index 000000000..7f6444ab5 --- /dev/null +++ b/weed/pb/proto_read_write_test.go @@ -0,0 +1,43 @@ +package pb + +import ( + "fmt" + "testing" + + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/golang/protobuf/jsonpb" +) + +func TestJsonpMarshalUnmarshal(t *testing.T) { + + tv := &volume_server_pb.RemoteFile{ + BackendType: "aws", + BackendId: "", + FileSize: 12, + } + + m := jsonpb.Marshaler{ + EmitDefaults: true, + Indent: " ", + } + + if text, err := m.MarshalToString(tv); err != nil { + fmt.Printf("marshal eror: %v\n", err) + } else { + fmt.Printf("marshalled: %s\n", text) + } + + rawJson := `{ + "backendType":"aws", + "backendId":"temp", + "FileSize":12 + }` + + tv1 := &volume_server_pb.RemoteFile{} + if err := jsonpb.UnmarshalString(rawJson, tv1); err != nil { + fmt.Printf("unmarshal error: %v\n", err) + } + + fmt.Printf("unmarshalled: %+v\n", tv1) + +} diff --git a/weed/pb/queue.proto b/weed/pb/queue.proto new file mode 100644 index 000000000..39b6ee05a --- /dev/null +++ b/weed/pb/queue.proto @@ -0,0 +1,66 @@ +syntax = "proto3"; + +package queue_pb; + +option java_package = "seaweedfs.client"; +option java_outer_classname = "QueueProto"; + +////////////////////////////////////////////////// + +service SeaweedQueue { + + rpc StreamWrite (stream WriteMessageRequest) returns (stream WriteMessageResponse) { + } + + rpc StreamRead (ReadMessageRequest) returns (stream ReadMessageResponse) { + } + + rpc ConfigureTopic (ConfigureTopicRequest) returns (ConfigureTopicResponse) { + } + + rpc DeleteTopic (DeleteTopicRequest) returns (DeleteTopicResponse) { + } + +} + +////////////////////////////////////////////////// + + +message WriteMessageRequest { + string topic = 1; + int64 event_ns = 2; + bytes partition_key = 3; + bytes data = 4; +} + +message WriteMessageResponse { + string error = 1; + int64 ack_ns = 2; +} + +message ReadMessageRequest { + string topic = 1; + int64 start_ns = 2; +} + +message ReadMessageResponse { + string error = 1; + int64 event_ns = 2; + bytes data = 3; +} + +message ConfigureTopicRequest { + string topic = 1; + int64 ttl_seconds = 2; + int32 partition_count = 3; +} +message ConfigureTopicResponse { + string error = 1; +} + +message DeleteTopicRequest { + string topic = 1; +} +message DeleteTopicResponse { + string error = 1; +} diff --git a/weed/pb/queue_pb/queue.pb.go b/weed/pb/queue_pb/queue.pb.go new file mode 100644 index 000000000..8ec4d62aa --- /dev/null +++ b/weed/pb/queue_pb/queue.pb.go @@ -0,0 +1,516 @@ +// Code generated by protoc-gen-go. +// source: queue.proto +// DO NOT EDIT! + +/* +Package queue_pb is a generated protocol buffer package. + +It is generated from these files: + queue.proto + +It has these top-level messages: + WriteMessageRequest + WriteMessageResponse + ReadMessageRequest + ReadMessageResponse + ConfigureTopicRequest + ConfigureTopicResponse + DeleteTopicRequest + DeleteTopicResponse +*/ +package queue_pb + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" + +import ( + context "golang.org/x/net/context" + grpc "google.golang.org/grpc" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type WriteMessageRequest struct { + Topic string `protobuf:"bytes,1,opt,name=topic" json:"topic,omitempty"` + EventNs int64 `protobuf:"varint,2,opt,name=event_ns,json=eventNs" json:"event_ns,omitempty"` + PartitionKey []byte `protobuf:"bytes,3,opt,name=partition_key,json=partitionKey,proto3" json:"partition_key,omitempty"` + Data []byte `protobuf:"bytes,4,opt,name=data,proto3" json:"data,omitempty"` +} + +func (m *WriteMessageRequest) Reset() { *m = WriteMessageRequest{} } +func (m *WriteMessageRequest) String() string { return proto.CompactTextString(m) } +func (*WriteMessageRequest) ProtoMessage() {} +func (*WriteMessageRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} } + +func (m *WriteMessageRequest) GetTopic() string { + if m != nil { + return m.Topic + } + return "" +} + +func (m *WriteMessageRequest) GetEventNs() int64 { + if m != nil { + return m.EventNs + } + return 0 +} + +func (m *WriteMessageRequest) GetPartitionKey() []byte { + if m != nil { + return m.PartitionKey + } + return nil +} + +func (m *WriteMessageRequest) GetData() []byte { + if m != nil { + return m.Data + } + return nil +} + +type WriteMessageResponse struct { + Error string `protobuf:"bytes,1,opt,name=error" json:"error,omitempty"` + AckNs int64 `protobuf:"varint,2,opt,name=ack_ns,json=ackNs" json:"ack_ns,omitempty"` +} + +func (m *WriteMessageResponse) Reset() { *m = WriteMessageResponse{} } +func (m *WriteMessageResponse) String() string { return proto.CompactTextString(m) } +func (*WriteMessageResponse) ProtoMessage() {} +func (*WriteMessageResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} } + +func (m *WriteMessageResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} + +func (m *WriteMessageResponse) GetAckNs() int64 { + if m != nil { + return m.AckNs + } + return 0 +} + +type ReadMessageRequest struct { + Topic string `protobuf:"bytes,1,opt,name=topic" json:"topic,omitempty"` + StartNs int64 `protobuf:"varint,2,opt,name=start_ns,json=startNs" json:"start_ns,omitempty"` +} + +func (m *ReadMessageRequest) Reset() { *m = ReadMessageRequest{} } +func (m *ReadMessageRequest) String() string { return proto.CompactTextString(m) } +func (*ReadMessageRequest) ProtoMessage() {} +func (*ReadMessageRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{2} } + +func (m *ReadMessageRequest) GetTopic() string { + if m != nil { + return m.Topic + } + return "" +} + +func (m *ReadMessageRequest) GetStartNs() int64 { + if m != nil { + return m.StartNs + } + return 0 +} + +type ReadMessageResponse struct { + Error string `protobuf:"bytes,1,opt,name=error" json:"error,omitempty"` + EventNs int64 `protobuf:"varint,2,opt,name=event_ns,json=eventNs" json:"event_ns,omitempty"` + Data []byte `protobuf:"bytes,3,opt,name=data,proto3" json:"data,omitempty"` +} + +func (m *ReadMessageResponse) Reset() { *m = ReadMessageResponse{} } +func (m *ReadMessageResponse) String() string { return proto.CompactTextString(m) } +func (*ReadMessageResponse) ProtoMessage() {} +func (*ReadMessageResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} } + +func (m *ReadMessageResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} + +func (m *ReadMessageResponse) GetEventNs() int64 { + if m != nil { + return m.EventNs + } + return 0 +} + +func (m *ReadMessageResponse) GetData() []byte { + if m != nil { + return m.Data + } + return nil +} + +type ConfigureTopicRequest struct { + Topic string `protobuf:"bytes,1,opt,name=topic" json:"topic,omitempty"` + TtlSeconds int64 `protobuf:"varint,2,opt,name=ttl_seconds,json=ttlSeconds" json:"ttl_seconds,omitempty"` + PartitionCount int32 `protobuf:"varint,3,opt,name=partition_count,json=partitionCount" json:"partition_count,omitempty"` +} + +func (m *ConfigureTopicRequest) Reset() { *m = ConfigureTopicRequest{} } +func (m *ConfigureTopicRequest) String() string { return proto.CompactTextString(m) } +func (*ConfigureTopicRequest) ProtoMessage() {} +func (*ConfigureTopicRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} } + +func (m *ConfigureTopicRequest) GetTopic() string { + if m != nil { + return m.Topic + } + return "" +} + +func (m *ConfigureTopicRequest) GetTtlSeconds() int64 { + if m != nil { + return m.TtlSeconds + } + return 0 +} + +func (m *ConfigureTopicRequest) GetPartitionCount() int32 { + if m != nil { + return m.PartitionCount + } + return 0 +} + +type ConfigureTopicResponse struct { + Error string `protobuf:"bytes,1,opt,name=error" json:"error,omitempty"` +} + +func (m *ConfigureTopicResponse) Reset() { *m = ConfigureTopicResponse{} } +func (m *ConfigureTopicResponse) String() string { return proto.CompactTextString(m) } +func (*ConfigureTopicResponse) ProtoMessage() {} +func (*ConfigureTopicResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} } + +func (m *ConfigureTopicResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} + +type DeleteTopicRequest struct { + Topic string `protobuf:"bytes,1,opt,name=topic" json:"topic,omitempty"` +} + +func (m *DeleteTopicRequest) Reset() { *m = DeleteTopicRequest{} } +func (m *DeleteTopicRequest) String() string { return proto.CompactTextString(m) } +func (*DeleteTopicRequest) ProtoMessage() {} +func (*DeleteTopicRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} } + +func (m *DeleteTopicRequest) GetTopic() string { + if m != nil { + return m.Topic + } + return "" +} + +type DeleteTopicResponse struct { + Error string `protobuf:"bytes,1,opt,name=error" json:"error,omitempty"` +} + +func (m *DeleteTopicResponse) Reset() { *m = DeleteTopicResponse{} } +func (m *DeleteTopicResponse) String() string { return proto.CompactTextString(m) } +func (*DeleteTopicResponse) ProtoMessage() {} +func (*DeleteTopicResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} } + +func (m *DeleteTopicResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} + +func init() { + proto.RegisterType((*WriteMessageRequest)(nil), "queue_pb.WriteMessageRequest") + proto.RegisterType((*WriteMessageResponse)(nil), "queue_pb.WriteMessageResponse") + proto.RegisterType((*ReadMessageRequest)(nil), "queue_pb.ReadMessageRequest") + proto.RegisterType((*ReadMessageResponse)(nil), "queue_pb.ReadMessageResponse") + proto.RegisterType((*ConfigureTopicRequest)(nil), "queue_pb.ConfigureTopicRequest") + proto.RegisterType((*ConfigureTopicResponse)(nil), "queue_pb.ConfigureTopicResponse") + proto.RegisterType((*DeleteTopicRequest)(nil), "queue_pb.DeleteTopicRequest") + proto.RegisterType((*DeleteTopicResponse)(nil), "queue_pb.DeleteTopicResponse") +} + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConn + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion4 + +// Client API for SeaweedQueue service + +type SeaweedQueueClient interface { + StreamWrite(ctx context.Context, opts ...grpc.CallOption) (SeaweedQueue_StreamWriteClient, error) + StreamRead(ctx context.Context, in *ReadMessageRequest, opts ...grpc.CallOption) (SeaweedQueue_StreamReadClient, error) + ConfigureTopic(ctx context.Context, in *ConfigureTopicRequest, opts ...grpc.CallOption) (*ConfigureTopicResponse, error) + DeleteTopic(ctx context.Context, in *DeleteTopicRequest, opts ...grpc.CallOption) (*DeleteTopicResponse, error) +} + +type seaweedQueueClient struct { + cc *grpc.ClientConn +} + +func NewSeaweedQueueClient(cc *grpc.ClientConn) SeaweedQueueClient { + return &seaweedQueueClient{cc} +} + +func (c *seaweedQueueClient) StreamWrite(ctx context.Context, opts ...grpc.CallOption) (SeaweedQueue_StreamWriteClient, error) { + stream, err := grpc.NewClientStream(ctx, &_SeaweedQueue_serviceDesc.Streams[0], c.cc, "/queue_pb.SeaweedQueue/StreamWrite", opts...) + if err != nil { + return nil, err + } + x := &seaweedQueueStreamWriteClient{stream} + return x, nil +} + +type SeaweedQueue_StreamWriteClient interface { + Send(*WriteMessageRequest) error + Recv() (*WriteMessageResponse, error) + grpc.ClientStream +} + +type seaweedQueueStreamWriteClient struct { + grpc.ClientStream +} + +func (x *seaweedQueueStreamWriteClient) Send(m *WriteMessageRequest) error { + return x.ClientStream.SendMsg(m) +} + +func (x *seaweedQueueStreamWriteClient) Recv() (*WriteMessageResponse, error) { + m := new(WriteMessageResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *seaweedQueueClient) StreamRead(ctx context.Context, in *ReadMessageRequest, opts ...grpc.CallOption) (SeaweedQueue_StreamReadClient, error) { + stream, err := grpc.NewClientStream(ctx, &_SeaweedQueue_serviceDesc.Streams[1], c.cc, "/queue_pb.SeaweedQueue/StreamRead", opts...) + if err != nil { + return nil, err + } + x := &seaweedQueueStreamReadClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type SeaweedQueue_StreamReadClient interface { + Recv() (*ReadMessageResponse, error) + grpc.ClientStream +} + +type seaweedQueueStreamReadClient struct { + grpc.ClientStream +} + +func (x *seaweedQueueStreamReadClient) Recv() (*ReadMessageResponse, error) { + m := new(ReadMessageResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *seaweedQueueClient) ConfigureTopic(ctx context.Context, in *ConfigureTopicRequest, opts ...grpc.CallOption) (*ConfigureTopicResponse, error) { + out := new(ConfigureTopicResponse) + err := grpc.Invoke(ctx, "/queue_pb.SeaweedQueue/ConfigureTopic", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *seaweedQueueClient) DeleteTopic(ctx context.Context, in *DeleteTopicRequest, opts ...grpc.CallOption) (*DeleteTopicResponse, error) { + out := new(DeleteTopicResponse) + err := grpc.Invoke(ctx, "/queue_pb.SeaweedQueue/DeleteTopic", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// Server API for SeaweedQueue service + +type SeaweedQueueServer interface { + StreamWrite(SeaweedQueue_StreamWriteServer) error + StreamRead(*ReadMessageRequest, SeaweedQueue_StreamReadServer) error + ConfigureTopic(context.Context, *ConfigureTopicRequest) (*ConfigureTopicResponse, error) + DeleteTopic(context.Context, *DeleteTopicRequest) (*DeleteTopicResponse, error) +} + +func RegisterSeaweedQueueServer(s *grpc.Server, srv SeaweedQueueServer) { + s.RegisterService(&_SeaweedQueue_serviceDesc, srv) +} + +func _SeaweedQueue_StreamWrite_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(SeaweedQueueServer).StreamWrite(&seaweedQueueStreamWriteServer{stream}) +} + +type SeaweedQueue_StreamWriteServer interface { + Send(*WriteMessageResponse) error + Recv() (*WriteMessageRequest, error) + grpc.ServerStream +} + +type seaweedQueueStreamWriteServer struct { + grpc.ServerStream +} + +func (x *seaweedQueueStreamWriteServer) Send(m *WriteMessageResponse) error { + return x.ServerStream.SendMsg(m) +} + +func (x *seaweedQueueStreamWriteServer) Recv() (*WriteMessageRequest, error) { + m := new(WriteMessageRequest) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func _SeaweedQueue_StreamRead_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(ReadMessageRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(SeaweedQueueServer).StreamRead(m, &seaweedQueueStreamReadServer{stream}) +} + +type SeaweedQueue_StreamReadServer interface { + Send(*ReadMessageResponse) error + grpc.ServerStream +} + +type seaweedQueueStreamReadServer struct { + grpc.ServerStream +} + +func (x *seaweedQueueStreamReadServer) Send(m *ReadMessageResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _SeaweedQueue_ConfigureTopic_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ConfigureTopicRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedQueueServer).ConfigureTopic(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/queue_pb.SeaweedQueue/ConfigureTopic", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedQueueServer).ConfigureTopic(ctx, req.(*ConfigureTopicRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _SeaweedQueue_DeleteTopic_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(DeleteTopicRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(SeaweedQueueServer).DeleteTopic(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/queue_pb.SeaweedQueue/DeleteTopic", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(SeaweedQueueServer).DeleteTopic(ctx, req.(*DeleteTopicRequest)) + } + return interceptor(ctx, in, info, handler) +} + +var _SeaweedQueue_serviceDesc = grpc.ServiceDesc{ + ServiceName: "queue_pb.SeaweedQueue", + HandlerType: (*SeaweedQueueServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "ConfigureTopic", + Handler: _SeaweedQueue_ConfigureTopic_Handler, + }, + { + MethodName: "DeleteTopic", + Handler: _SeaweedQueue_DeleteTopic_Handler, + }, + }, + Streams: []grpc.StreamDesc{ + { + StreamName: "StreamWrite", + Handler: _SeaweedQueue_StreamWrite_Handler, + ServerStreams: true, + ClientStreams: true, + }, + { + StreamName: "StreamRead", + Handler: _SeaweedQueue_StreamRead_Handler, + ServerStreams: true, + }, + }, + Metadata: "queue.proto", +} + +func init() { proto.RegisterFile("queue.proto", fileDescriptor0) } + +var fileDescriptor0 = []byte{ + // 429 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x8c, 0x53, 0xcd, 0x6e, 0xd3, 0x40, + 0x10, 0xae, 0x9b, 0xa6, 0x94, 0x49, 0x28, 0x68, 0xd2, 0xa2, 0x10, 0xd1, 0x36, 0x5a, 0x0e, 0x44, + 0x20, 0x59, 0x15, 0xbc, 0x41, 0x03, 0x27, 0x68, 0x04, 0x0e, 0x08, 0x89, 0x8b, 0xb5, 0xb5, 0xa7, + 0x95, 0x15, 0xb3, 0xeb, 0xee, 0x8e, 0xa9, 0x7a, 0xe2, 0x2d, 0x79, 0x1e, 0xe4, 0xb5, 0x5c, 0xdb, + 0x34, 0xb1, 0x7a, 0xf3, 0xcc, 0x78, 0xe7, 0xfb, 0xd9, 0x6f, 0x61, 0x70, 0x9d, 0x53, 0x4e, 0x7e, + 0x66, 0x34, 0x6b, 0xdc, 0x73, 0x45, 0x98, 0x5d, 0x88, 0x3f, 0x30, 0xfa, 0x61, 0x12, 0xa6, 0x73, + 0xb2, 0x56, 0x5e, 0x51, 0x40, 0xd7, 0x39, 0x59, 0xc6, 0x03, 0xe8, 0xb3, 0xce, 0x92, 0x68, 0xec, + 0x4d, 0xbd, 0xd9, 0xe3, 0xa0, 0x2c, 0xf0, 0x05, 0xec, 0xd1, 0x6f, 0x52, 0x1c, 0x2a, 0x3b, 0xde, + 0x9e, 0x7a, 0xb3, 0x5e, 0xf0, 0xc8, 0xd5, 0x0b, 0x8b, 0xaf, 0xe0, 0x49, 0x26, 0x0d, 0x27, 0x9c, + 0x68, 0x15, 0xae, 0xe8, 0x76, 0xdc, 0x9b, 0x7a, 0xb3, 0x61, 0x30, 0xbc, 0x6b, 0x7e, 0xa2, 0x5b, + 0x44, 0xd8, 0x89, 0x25, 0xcb, 0xf1, 0x8e, 0x9b, 0xb9, 0x6f, 0x31, 0x87, 0x83, 0x36, 0x01, 0x9b, + 0x69, 0x65, 0xa9, 0x60, 0x40, 0xc6, 0x68, 0x53, 0x31, 0x70, 0x05, 0x1e, 0xc2, 0xae, 0x8c, 0x56, + 0x35, 0x7e, 0x5f, 0x46, 0xab, 0x85, 0x15, 0x1f, 0x01, 0x03, 0x92, 0xf1, 0x43, 0x45, 0x58, 0x96, + 0xa6, 0x29, 0xc2, 0xd5, 0x0b, 0x2b, 0x7e, 0xc2, 0xa8, 0xb5, 0xa6, 0x93, 0x4a, 0x87, 0x19, 0x95, + 0xce, 0x5e, 0x43, 0xe7, 0x0d, 0x1c, 0xce, 0xb5, 0xba, 0x4c, 0xae, 0x72, 0x43, 0xdf, 0x0a, 0x22, + 0xdd, 0x2c, 0x4f, 0x60, 0xc0, 0x9c, 0x86, 0x96, 0x22, 0xad, 0xe2, 0x0a, 0x00, 0x98, 0xd3, 0x65, + 0xd9, 0xc1, 0xd7, 0xf0, 0xb4, 0x36, 0x3c, 0xd2, 0xb9, 0x62, 0x07, 0xd7, 0x0f, 0xf6, 0xef, 0xda, + 0xf3, 0xa2, 0x2b, 0x7c, 0x78, 0xfe, 0x3f, 0x70, 0x97, 0x2e, 0xf1, 0x06, 0xf0, 0x03, 0xa5, 0xc4, + 0x0f, 0x60, 0x29, 0xde, 0xc2, 0xa8, 0xf5, 0x6f, 0xd7, 0xe2, 0x77, 0x7f, 0xb7, 0x61, 0xb8, 0x24, + 0x79, 0x43, 0x14, 0x7f, 0x2d, 0xe2, 0x87, 0x01, 0x0c, 0x96, 0x6c, 0x48, 0xfe, 0x72, 0x01, 0xc0, + 0x23, 0xbf, 0x4a, 0xa5, 0xbf, 0x26, 0x92, 0x93, 0xe3, 0x4d, 0xe3, 0x12, 0x54, 0x6c, 0xcd, 0xbc, + 0x53, 0x0f, 0xcf, 0x01, 0xca, 0x9d, 0xc5, 0x45, 0xe2, 0xcb, 0xfa, 0xcc, 0xfd, 0x7c, 0x4c, 0x8e, + 0x36, 0x4c, 0xab, 0x85, 0xa7, 0x1e, 0x7e, 0x87, 0xfd, 0xb6, 0x79, 0x78, 0x52, 0x1f, 0x5a, 0x7b, + 0x9f, 0x93, 0xe9, 0xe6, 0x1f, 0xaa, 0xc5, 0xf8, 0x19, 0x06, 0x0d, 0xdf, 0x9a, 0x34, 0xef, 0x5b, + 0xdf, 0xa4, 0xb9, 0xc6, 0x6c, 0xb1, 0x75, 0x76, 0x0c, 0xcf, 0x6c, 0xe9, 0xeb, 0xa5, 0xf5, 0xa3, + 0x34, 0x21, 0xc5, 0x67, 0xe0, 0x2c, 0xfe, 0x52, 0xbc, 0xf6, 0x8b, 0x5d, 0xf7, 0xe8, 0xdf, 0xff, + 0x0b, 0x00, 0x00, 0xff, 0xff, 0x7d, 0x3e, 0x14, 0xd8, 0x03, 0x04, 0x00, 0x00, +} diff --git a/weed/pb/volume_info.go b/weed/pb/volume_info.go new file mode 100644 index 000000000..c4f733f5c --- /dev/null +++ b/weed/pb/volume_info.go @@ -0,0 +1,76 @@ +package pb + +import ( + "bytes" + "fmt" + "io/ioutil" + + _ "github.com/chrislusf/seaweedfs/weed/storage/backend/s3_backend" + "github.com/chrislusf/seaweedfs/weed/util" + + "github.com/golang/protobuf/jsonpb" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" +) + +// MaybeLoadVolumeInfo load the file data as *volume_server_pb.VolumeInfo, the returned volumeInfo will not be nil +func MaybeLoadVolumeInfo(fileName string) (*volume_server_pb.VolumeInfo, bool, error) { + + volumeInfo := &volume_server_pb.VolumeInfo{} + + glog.V(1).Infof("maybeLoadVolumeInfo checks %s", fileName) + if exists, canRead, _, _, _ := util.CheckFile(fileName); !exists || !canRead { + if !exists { + return volumeInfo, false, nil + } + if !canRead { + glog.Warningf("can not read %s", fileName) + return volumeInfo, false, fmt.Errorf("can not read %s", fileName) + } + return volumeInfo, false, nil + } + + glog.V(1).Infof("maybeLoadVolumeInfo reads %s", fileName) + tierData, readErr := ioutil.ReadFile(fileName) + if readErr != nil { + glog.Warningf("fail to read %s : %v", fileName, readErr) + return volumeInfo, false, fmt.Errorf("fail to read %s : %v", fileName, readErr) + } + + glog.V(1).Infof("maybeLoadVolumeInfo Unmarshal volume info %v", fileName) + if err := jsonpb.Unmarshal(bytes.NewReader(tierData), volumeInfo); err != nil { + glog.Warningf("unmarshal error: %v", err) + return volumeInfo, false, fmt.Errorf("unmarshal error: %v", err) + } + + if len(volumeInfo.GetFiles()) == 0 { + return volumeInfo, false, nil + } + + return volumeInfo, true, nil +} + +func SaveVolumeInfo(fileName string, volumeInfo *volume_server_pb.VolumeInfo) error { + + if exists, _, canWrite, _, _ := util.CheckFile(fileName); exists && !canWrite { + return fmt.Errorf("%s not writable", fileName) + } + + m := jsonpb.Marshaler{ + EmitDefaults: true, + Indent: " ", + } + + text, marshalErr := m.MarshalToString(volumeInfo) + if marshalErr != nil { + return fmt.Errorf("marshal to %s: %v", fileName, marshalErr) + } + + writeErr := ioutil.WriteFile(fileName, []byte(text), 0755) + if writeErr != nil { + return fmt.Errorf("fail to write %s : %v", fileName, writeErr) + } + + return nil +} diff --git a/weed/pb/volume_server.proto b/weed/pb/volume_server.proto index ded7f2aa3..ce53fdc96 100644 --- a/weed/pb/volume_server.proto +++ b/weed/pb/volume_server.proto @@ -8,6 +8,10 @@ service VolumeServer { //Experts only: takes multiple fid parameters. This function does not propagate deletes to replicas. rpc BatchDelete (BatchDeleteRequest) returns (BatchDeleteResponse) { } + + rpc FileGet (FileGetRequest) returns (stream FileGetResponse) { + } + rpc VacuumVolumeCheck (VacuumVolumeCheckRequest) returns (VacuumVolumeCheckResponse) { } rpc VacuumVolumeCompact (VacuumVolumeCompactRequest) returns (VacuumVolumeCompactResponse) { @@ -19,22 +23,70 @@ service VolumeServer { rpc DeleteCollection (DeleteCollectionRequest) returns (DeleteCollectionResponse) { } - rpc AssignVolume (AssignVolumeRequest) returns (AssignVolumeResponse) { + rpc AllocateVolume (AllocateVolumeRequest) returns (AllocateVolumeResponse) { } rpc VolumeSyncStatus (VolumeSyncStatusRequest) returns (VolumeSyncStatusResponse) { } - rpc VolumeSyncIndex (VolumeSyncIndexRequest) returns (VolumeSyncIndexResponse) { - } - rpc VolumeSyncData (VolumeSyncDataRequest) returns (VolumeSyncDataResponse) { + rpc VolumeIncrementalCopy (VolumeIncrementalCopyRequest) returns (stream VolumeIncrementalCopyResponse) { } rpc VolumeMount (VolumeMountRequest) returns (VolumeMountResponse) { } rpc VolumeUnmount (VolumeUnmountRequest) returns (VolumeUnmountResponse) { } + rpc VolumeDelete (VolumeDeleteRequest) returns (VolumeDeleteResponse) { + } + rpc VolumeMarkReadonly (VolumeMarkReadonlyRequest) returns (VolumeMarkReadonlyResponse) { + } + rpc VolumeConfigure (VolumeConfigureRequest) returns (VolumeConfigureResponse) { + } + + // copy the .idx .dat files, and mount this volume + rpc VolumeCopy (VolumeCopyRequest) returns (VolumeCopyResponse) { + } + rpc ReadVolumeFileStatus (ReadVolumeFileStatusRequest) returns (ReadVolumeFileStatusResponse) { + } + rpc CopyFile (CopyFileRequest) returns (stream CopyFileResponse) { + } - // rpc VolumeUiPage (VolumeUiPageRequest) returns (VolumeUiPageResponse) {} + rpc VolumeTailSender (VolumeTailSenderRequest) returns (stream VolumeTailSenderResponse) { + } + rpc VolumeTailReceiver (VolumeTailReceiverRequest) returns (VolumeTailReceiverResponse) { + } + + // erasure coding + rpc VolumeEcShardsGenerate (VolumeEcShardsGenerateRequest) returns (VolumeEcShardsGenerateResponse) { + } + rpc VolumeEcShardsRebuild (VolumeEcShardsRebuildRequest) returns (VolumeEcShardsRebuildResponse) { + } + rpc VolumeEcShardsCopy (VolumeEcShardsCopyRequest) returns (VolumeEcShardsCopyResponse) { + } + rpc VolumeEcShardsDelete (VolumeEcShardsDeleteRequest) returns (VolumeEcShardsDeleteResponse) { + } + rpc VolumeEcShardsMount (VolumeEcShardsMountRequest) returns (VolumeEcShardsMountResponse) { + } + rpc VolumeEcShardsUnmount (VolumeEcShardsUnmountRequest) returns (VolumeEcShardsUnmountResponse) { + } + rpc VolumeEcShardRead (VolumeEcShardReadRequest) returns (stream VolumeEcShardReadResponse) { + } + rpc VolumeEcBlobDelete (VolumeEcBlobDeleteRequest) returns (VolumeEcBlobDeleteResponse) { + } + rpc VolumeEcShardsToVolume (VolumeEcShardsToVolumeRequest) returns (VolumeEcShardsToVolumeResponse) { + } + + // tiered storage + rpc VolumeTierMoveDatToRemote (VolumeTierMoveDatToRemoteRequest) returns (stream VolumeTierMoveDatToRemoteResponse) { + } + rpc VolumeTierMoveDatFromRemote (VolumeTierMoveDatFromRemoteRequest) returns (stream VolumeTierMoveDatFromRemoteResponse) { + } + + rpc VolumeServerStatus (VolumeServerStatusRequest) returns (VolumeServerStatusResponse) { + } + + // <experimental> query + rpc Query (QueryRequest) returns (stream QueriedStripe) { + } } @@ -52,33 +104,50 @@ message DeleteResult { int32 status = 2; string error = 3; uint32 size = 4; + uint32 version = 5; +} + +message FileGetRequest { + string file_id = 1; + bool accept_gzip = 2; +} +message FileGetResponse { + bytes data = 1; + uint32 content_length = 2; + string content_type = 3; + uint64 last_modified = 4; + string filename = 5; + string etag = 6; + bool is_gzipped = 7; + map<string, string> headers = 8; + int32 errorCode = 9; } message Empty { } message VacuumVolumeCheckRequest { - uint32 volumd_id = 1; + uint32 volume_id = 1; } message VacuumVolumeCheckResponse { double garbage_ratio = 1; } message VacuumVolumeCompactRequest { - uint32 volumd_id = 1; + uint32 volume_id = 1; int64 preallocate = 2; } message VacuumVolumeCompactResponse { } message VacuumVolumeCommitRequest { - uint32 volumd_id = 1; + uint32 volume_id = 1; } message VacuumVolumeCommitResponse { } message VacuumVolumeCleanupRequest { - uint32 volumd_id = 1; + uint32 volume_id = 1; } message VacuumVolumeCleanupResponse { } @@ -89,21 +158,23 @@ message DeleteCollectionRequest { message DeleteCollectionResponse { } -message AssignVolumeRequest { - uint32 volumd_id = 1; +message AllocateVolumeRequest { + uint32 volume_id = 1; string collection = 2; int64 preallocate = 3; string replication = 4; string ttl = 5; + uint32 memory_map_max_size_mb = 6; } -message AssignVolumeResponse { +message AllocateVolumeResponse { } message VolumeSyncStatusRequest { - uint32 volumd_id = 1; + uint32 volume_id = 1; } message VolumeSyncStatusResponse { - uint32 volumd_id = 1; + uint32 volume_id = 1; + string collection = 2; string replication = 4; string ttl = 5; uint64 tail_offset = 6; @@ -111,39 +182,180 @@ message VolumeSyncStatusResponse { uint64 idx_file_size = 8; } -message VolumeSyncIndexRequest { - uint32 volumd_id = 1; -} -message VolumeSyncIndexResponse { - bytes index_file_content = 1; -} - -message VolumeSyncDataRequest { - uint32 volumd_id = 1; - uint32 revision = 2; - uint32 offset = 3; - uint32 size = 4; - string needle_id = 5; +message VolumeIncrementalCopyRequest { + uint32 volume_id = 1; + uint64 since_ns = 2; } -message VolumeSyncDataResponse { +message VolumeIncrementalCopyResponse { bytes file_content = 1; } message VolumeMountRequest { - uint32 volumd_id = 1; + uint32 volume_id = 1; } message VolumeMountResponse { } message VolumeUnmountRequest { - uint32 volumd_id = 1; + uint32 volume_id = 1; } message VolumeUnmountResponse { } -message VolumeUiPageRequest { +message VolumeDeleteRequest { + uint32 volume_id = 1; +} +message VolumeDeleteResponse { +} + +message VolumeMarkReadonlyRequest { + uint32 volume_id = 1; +} +message VolumeMarkReadonlyResponse { +} + +message VolumeConfigureRequest { + uint32 volume_id = 1; + string replication = 2; +} +message VolumeConfigureResponse { + string error = 1; +} + +message VolumeCopyRequest { + uint32 volume_id = 1; + string collection = 2; + string replication = 3; + string ttl = 4; + string source_data_node = 5; +} +message VolumeCopyResponse { + uint64 last_append_at_ns = 1; +} + +message CopyFileRequest { + uint32 volume_id = 1; + string ext = 2; + uint32 compaction_revision = 3; + uint64 stop_offset = 4; + string collection = 5; + bool is_ec_volume = 6; + bool ignore_source_file_not_found = 7; +} +message CopyFileResponse { + bytes file_content = 1; +} + +message VolumeTailSenderRequest { + uint32 volume_id = 1; + uint64 since_ns = 2; + uint32 idle_timeout_seconds = 3; +} +message VolumeTailSenderResponse { + bytes needle_header = 1; + bytes needle_body = 2; + bool is_last_chunk = 3; +} + +message VolumeTailReceiverRequest { + uint32 volume_id = 1; + uint64 since_ns = 2; + uint32 idle_timeout_seconds = 3; + string source_volume_server = 4; +} +message VolumeTailReceiverResponse { +} + +message VolumeEcShardsGenerateRequest { + uint32 volume_id = 1; + string collection = 2; +} +message VolumeEcShardsGenerateResponse { +} + +message VolumeEcShardsRebuildRequest { + uint32 volume_id = 1; + string collection = 2; +} +message VolumeEcShardsRebuildResponse { + repeated uint32 rebuilt_shard_ids = 1; +} + +message VolumeEcShardsCopyRequest { + uint32 volume_id = 1; + string collection = 2; + repeated uint32 shard_ids = 3; + bool copy_ecx_file = 4; + string source_data_node = 5; + bool copy_ecj_file = 6; + bool copy_vif_file = 7; +} +message VolumeEcShardsCopyResponse { +} + +message VolumeEcShardsDeleteRequest { + uint32 volume_id = 1; + string collection = 2; + repeated uint32 shard_ids = 3; } -message VolumeUiPageResponse { +message VolumeEcShardsDeleteResponse { +} + +message VolumeEcShardsMountRequest { + uint32 volume_id = 1; + string collection = 2; + repeated uint32 shard_ids = 3; +} +message VolumeEcShardsMountResponse { +} + +message VolumeEcShardsUnmountRequest { + uint32 volume_id = 1; + repeated uint32 shard_ids = 3; +} +message VolumeEcShardsUnmountResponse { +} + +message VolumeEcShardReadRequest { + uint32 volume_id = 1; + uint32 shard_id = 2; + int64 offset = 3; + int64 size = 4; + uint64 file_key = 5; +} +message VolumeEcShardReadResponse { + bytes data = 1; + bool is_deleted = 2; +} + +message VolumeEcBlobDeleteRequest { + uint32 volume_id = 1; + string collection = 2; + uint64 file_key = 3; + uint32 version = 4; +} +message VolumeEcBlobDeleteResponse { +} + +message VolumeEcShardsToVolumeRequest { + uint32 volume_id = 1; + string collection = 2; +} +message VolumeEcShardsToVolumeResponse { +} + +message ReadVolumeFileStatusRequest { + uint32 volume_id = 1; +} +message ReadVolumeFileStatusResponse { + uint32 volume_id = 1; + uint64 idx_file_timestamp_seconds = 2; + uint64 idx_file_size = 3; + uint64 dat_file_timestamp_seconds = 4; + uint64 dat_file_size = 5; + uint64 file_count = 6; + uint32 compaction_revision = 7; + string collection = 8; } message DiskStatus { @@ -151,6 +363,8 @@ message DiskStatus { uint64 all = 2; uint64 used = 3; uint64 free = 4; + float percent_free = 5; + float percent_used = 6; } message MemStatus { @@ -162,3 +376,106 @@ message MemStatus { uint64 heap = 6; uint64 stack = 7; } + +// tired storage on volume servers +message RemoteFile { + string backend_type = 1; + string backend_id = 2; + string key = 3; + uint64 offset = 4; + uint64 file_size = 5; + uint64 modified_time = 6; + string extension = 7; +} +message VolumeInfo { + repeated RemoteFile files = 1; + uint32 version = 2; + string replication = 3; +} + +message VolumeTierMoveDatToRemoteRequest { + uint32 volume_id = 1; + string collection = 2; + string destination_backend_name = 3; + bool keep_local_dat_file = 4; +} +message VolumeTierMoveDatToRemoteResponse { + int64 processed = 1; + float processedPercentage = 2; +} + +message VolumeTierMoveDatFromRemoteRequest { + uint32 volume_id = 1; + string collection = 2; + bool keep_remote_dat_file = 3; +} +message VolumeTierMoveDatFromRemoteResponse { + int64 processed = 1; + float processedPercentage = 2; +} + +message VolumeServerStatusRequest { + +} +message VolumeServerStatusResponse { + repeated DiskStatus disk_statuses = 1; + MemStatus memory_status = 2; +} + +// select on volume servers +message QueryRequest { + repeated string selections = 1; + repeated string from_file_ids = 2; + message Filter { + string field = 1; + string operand = 2; + string value = 3; + } + Filter filter = 3; + + message InputSerialization { + // NONE | GZIP | BZIP2 + string compression_type = 1; + message CSVInput { + string file_header_info = 1; // Valid values: NONE | USE | IGNORE + string record_delimiter = 2; // Default: \n + string field_delimiter = 3; // Default: , + string quote_charactoer = 4; // Default: " + string quote_escape_character = 5; // Default: " + string comments = 6; // Default: # + // If true, records might contain record delimiters within quote characters + bool allow_quoted_record_delimiter = 7; // default False. + } + message JSONInput { + string type = 1; // Valid values: DOCUMENT | LINES + } + message ParquetInput { + } + + CSVInput csv_input = 2; + JSONInput json_input = 3; + ParquetInput parquet_input = 4; + } + InputSerialization input_serialization = 4; + + message OutputSerialization { + message CSVOutput { + string quote_fields = 1; // Valid values: ALWAYS | ASNEEDED + string record_delimiter = 2; // Default: \n + string field_delimiter = 3; // Default: , + string quote_charactoer = 4; // Default: " + string quote_escape_character = 5; // Default: " + } + message JSONOutput { + string record_delimiter = 1; + } + + CSVOutput csv_output = 2; + JSONOutput json_output = 3; + } + + OutputSerialization output_serialization = 5; +} +message QueriedStripe { + bytes records = 1; +} diff --git a/weed/pb/volume_server_pb/volume_server.pb.go b/weed/pb/volume_server_pb/volume_server.pb.go index d291d5ff7..56baa0cf7 100644 --- a/weed/pb/volume_server_pb/volume_server.pb.go +++ b/weed/pb/volume_server_pb/volume_server.pb.go @@ -12,6 +12,8 @@ It has these top-level messages: BatchDeleteRequest BatchDeleteResponse DeleteResult + FileGetRequest + FileGetResponse Empty VacuumVolumeCheckRequest VacuumVolumeCheckResponse @@ -23,22 +25,62 @@ It has these top-level messages: VacuumVolumeCleanupResponse DeleteCollectionRequest DeleteCollectionResponse - AssignVolumeRequest - AssignVolumeResponse + AllocateVolumeRequest + AllocateVolumeResponse VolumeSyncStatusRequest VolumeSyncStatusResponse - VolumeSyncIndexRequest - VolumeSyncIndexResponse - VolumeSyncDataRequest - VolumeSyncDataResponse + VolumeIncrementalCopyRequest + VolumeIncrementalCopyResponse VolumeMountRequest VolumeMountResponse VolumeUnmountRequest VolumeUnmountResponse - VolumeUiPageRequest - VolumeUiPageResponse + VolumeDeleteRequest + VolumeDeleteResponse + VolumeMarkReadonlyRequest + VolumeMarkReadonlyResponse + VolumeConfigureRequest + VolumeConfigureResponse + VolumeCopyRequest + VolumeCopyResponse + CopyFileRequest + CopyFileResponse + VolumeTailSenderRequest + VolumeTailSenderResponse + VolumeTailReceiverRequest + VolumeTailReceiverResponse + VolumeEcShardsGenerateRequest + VolumeEcShardsGenerateResponse + VolumeEcShardsRebuildRequest + VolumeEcShardsRebuildResponse + VolumeEcShardsCopyRequest + VolumeEcShardsCopyResponse + VolumeEcShardsDeleteRequest + VolumeEcShardsDeleteResponse + VolumeEcShardsMountRequest + VolumeEcShardsMountResponse + VolumeEcShardsUnmountRequest + VolumeEcShardsUnmountResponse + VolumeEcShardReadRequest + VolumeEcShardReadResponse + VolumeEcBlobDeleteRequest + VolumeEcBlobDeleteResponse + VolumeEcShardsToVolumeRequest + VolumeEcShardsToVolumeResponse + ReadVolumeFileStatusRequest + ReadVolumeFileStatusResponse DiskStatus MemStatus + RemoteFile + VolumeInfo + VolumeTierMoveDatToRemoteRequest + VolumeTierMoveDatToRemoteResponse + VolumeTierMoveDatFromRemoteRequest + VolumeTierMoveDatFromRemoteResponse + VolumeServerStatusRequest + VolumeServerStatusResponse + QueryRequest + QueriedStripe */ package volume_server_pb @@ -95,10 +137,11 @@ func (m *BatchDeleteResponse) GetResults() []*DeleteResult { } type DeleteResult struct { - FileId string `protobuf:"bytes,1,opt,name=file_id,json=fileId" json:"file_id,omitempty"` - Status int32 `protobuf:"varint,2,opt,name=status" json:"status,omitempty"` - Error string `protobuf:"bytes,3,opt,name=error" json:"error,omitempty"` - Size uint32 `protobuf:"varint,4,opt,name=size" json:"size,omitempty"` + FileId string `protobuf:"bytes,1,opt,name=file_id,json=fileId" json:"file_id,omitempty"` + Status int32 `protobuf:"varint,2,opt,name=status" json:"status,omitempty"` + Error string `protobuf:"bytes,3,opt,name=error" json:"error,omitempty"` + Size uint32 `protobuf:"varint,4,opt,name=size" json:"size,omitempty"` + Version uint32 `protobuf:"varint,5,opt,name=version" json:"version,omitempty"` } func (m *DeleteResult) Reset() { *m = DeleteResult{} } @@ -134,26 +177,137 @@ func (m *DeleteResult) GetSize() uint32 { return 0 } +func (m *DeleteResult) GetVersion() uint32 { + if m != nil { + return m.Version + } + return 0 +} + +type FileGetRequest struct { + FileId string `protobuf:"bytes,1,opt,name=file_id,json=fileId" json:"file_id,omitempty"` + AcceptGzip bool `protobuf:"varint,2,opt,name=accept_gzip,json=acceptGzip" json:"accept_gzip,omitempty"` +} + +func (m *FileGetRequest) Reset() { *m = FileGetRequest{} } +func (m *FileGetRequest) String() string { return proto.CompactTextString(m) } +func (*FileGetRequest) ProtoMessage() {} +func (*FileGetRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} } + +func (m *FileGetRequest) GetFileId() string { + if m != nil { + return m.FileId + } + return "" +} + +func (m *FileGetRequest) GetAcceptGzip() bool { + if m != nil { + return m.AcceptGzip + } + return false +} + +type FileGetResponse struct { + Data []byte `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` + ContentLength uint32 `protobuf:"varint,2,opt,name=content_length,json=contentLength" json:"content_length,omitempty"` + ContentType string `protobuf:"bytes,3,opt,name=content_type,json=contentType" json:"content_type,omitempty"` + LastModified uint64 `protobuf:"varint,4,opt,name=last_modified,json=lastModified" json:"last_modified,omitempty"` + Filename string `protobuf:"bytes,5,opt,name=filename" json:"filename,omitempty"` + Etag string `protobuf:"bytes,6,opt,name=etag" json:"etag,omitempty"` + IsGzipped bool `protobuf:"varint,7,opt,name=is_gzipped,json=isGzipped" json:"is_gzipped,omitempty"` + Headers map[string]string `protobuf:"bytes,8,rep,name=headers" json:"headers,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + ErrorCode int32 `protobuf:"varint,9,opt,name=errorCode" json:"errorCode,omitempty"` +} + +func (m *FileGetResponse) Reset() { *m = FileGetResponse{} } +func (m *FileGetResponse) String() string { return proto.CompactTextString(m) } +func (*FileGetResponse) ProtoMessage() {} +func (*FileGetResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} } + +func (m *FileGetResponse) GetData() []byte { + if m != nil { + return m.Data + } + return nil +} + +func (m *FileGetResponse) GetContentLength() uint32 { + if m != nil { + return m.ContentLength + } + return 0 +} + +func (m *FileGetResponse) GetContentType() string { + if m != nil { + return m.ContentType + } + return "" +} + +func (m *FileGetResponse) GetLastModified() uint64 { + if m != nil { + return m.LastModified + } + return 0 +} + +func (m *FileGetResponse) GetFilename() string { + if m != nil { + return m.Filename + } + return "" +} + +func (m *FileGetResponse) GetEtag() string { + if m != nil { + return m.Etag + } + return "" +} + +func (m *FileGetResponse) GetIsGzipped() bool { + if m != nil { + return m.IsGzipped + } + return false +} + +func (m *FileGetResponse) GetHeaders() map[string]string { + if m != nil { + return m.Headers + } + return nil +} + +func (m *FileGetResponse) GetErrorCode() int32 { + if m != nil { + return m.ErrorCode + } + return 0 +} + type Empty struct { } func (m *Empty) Reset() { *m = Empty{} } func (m *Empty) String() string { return proto.CompactTextString(m) } func (*Empty) ProtoMessage() {} -func (*Empty) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} } +func (*Empty) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} } type VacuumVolumeCheckRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` } func (m *VacuumVolumeCheckRequest) Reset() { *m = VacuumVolumeCheckRequest{} } func (m *VacuumVolumeCheckRequest) String() string { return proto.CompactTextString(m) } func (*VacuumVolumeCheckRequest) ProtoMessage() {} -func (*VacuumVolumeCheckRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} } +func (*VacuumVolumeCheckRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} } -func (m *VacuumVolumeCheckRequest) GetVolumdId() uint32 { +func (m *VacuumVolumeCheckRequest) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } @@ -165,7 +319,7 @@ type VacuumVolumeCheckResponse struct { func (m *VacuumVolumeCheckResponse) Reset() { *m = VacuumVolumeCheckResponse{} } func (m *VacuumVolumeCheckResponse) String() string { return proto.CompactTextString(m) } func (*VacuumVolumeCheckResponse) ProtoMessage() {} -func (*VacuumVolumeCheckResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} } +func (*VacuumVolumeCheckResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} } func (m *VacuumVolumeCheckResponse) GetGarbageRatio() float64 { if m != nil { @@ -175,18 +329,18 @@ func (m *VacuumVolumeCheckResponse) GetGarbageRatio() float64 { } type VacuumVolumeCompactRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` Preallocate int64 `protobuf:"varint,2,opt,name=preallocate" json:"preallocate,omitempty"` } func (m *VacuumVolumeCompactRequest) Reset() { *m = VacuumVolumeCompactRequest{} } func (m *VacuumVolumeCompactRequest) String() string { return proto.CompactTextString(m) } func (*VacuumVolumeCompactRequest) ProtoMessage() {} -func (*VacuumVolumeCompactRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} } +func (*VacuumVolumeCompactRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} } -func (m *VacuumVolumeCompactRequest) GetVolumdId() uint32 { +func (m *VacuumVolumeCompactRequest) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } @@ -204,20 +358,20 @@ type VacuumVolumeCompactResponse struct { func (m *VacuumVolumeCompactResponse) Reset() { *m = VacuumVolumeCompactResponse{} } func (m *VacuumVolumeCompactResponse) String() string { return proto.CompactTextString(m) } func (*VacuumVolumeCompactResponse) ProtoMessage() {} -func (*VacuumVolumeCompactResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} } +func (*VacuumVolumeCompactResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} } type VacuumVolumeCommitRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` } func (m *VacuumVolumeCommitRequest) Reset() { *m = VacuumVolumeCommitRequest{} } func (m *VacuumVolumeCommitRequest) String() string { return proto.CompactTextString(m) } func (*VacuumVolumeCommitRequest) ProtoMessage() {} -func (*VacuumVolumeCommitRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} } +func (*VacuumVolumeCommitRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} } -func (m *VacuumVolumeCommitRequest) GetVolumdId() uint32 { +func (m *VacuumVolumeCommitRequest) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } @@ -228,20 +382,20 @@ type VacuumVolumeCommitResponse struct { func (m *VacuumVolumeCommitResponse) Reset() { *m = VacuumVolumeCommitResponse{} } func (m *VacuumVolumeCommitResponse) String() string { return proto.CompactTextString(m) } func (*VacuumVolumeCommitResponse) ProtoMessage() {} -func (*VacuumVolumeCommitResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} } +func (*VacuumVolumeCommitResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} } type VacuumVolumeCleanupRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` } func (m *VacuumVolumeCleanupRequest) Reset() { *m = VacuumVolumeCleanupRequest{} } func (m *VacuumVolumeCleanupRequest) String() string { return proto.CompactTextString(m) } func (*VacuumVolumeCleanupRequest) ProtoMessage() {} -func (*VacuumVolumeCleanupRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} } +func (*VacuumVolumeCleanupRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} } -func (m *VacuumVolumeCleanupRequest) GetVolumdId() uint32 { +func (m *VacuumVolumeCleanupRequest) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } @@ -252,7 +406,7 @@ type VacuumVolumeCleanupResponse struct { func (m *VacuumVolumeCleanupResponse) Reset() { *m = VacuumVolumeCleanupResponse{} } func (m *VacuumVolumeCleanupResponse) String() string { return proto.CompactTextString(m) } func (*VacuumVolumeCleanupResponse) ProtoMessage() {} -func (*VacuumVolumeCleanupResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} } +func (*VacuumVolumeCleanupResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} } type DeleteCollectionRequest struct { Collection string `protobuf:"bytes,1,opt,name=collection" json:"collection,omitempty"` @@ -261,7 +415,7 @@ type DeleteCollectionRequest struct { func (m *DeleteCollectionRequest) Reset() { *m = DeleteCollectionRequest{} } func (m *DeleteCollectionRequest) String() string { return proto.CompactTextString(m) } func (*DeleteCollectionRequest) ProtoMessage() {} -func (*DeleteCollectionRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} } +func (*DeleteCollectionRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} } func (m *DeleteCollectionRequest) GetCollection() string { if m != nil { @@ -276,82 +430,91 @@ type DeleteCollectionResponse struct { func (m *DeleteCollectionResponse) Reset() { *m = DeleteCollectionResponse{} } func (m *DeleteCollectionResponse) String() string { return proto.CompactTextString(m) } func (*DeleteCollectionResponse) ProtoMessage() {} -func (*DeleteCollectionResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} } +func (*DeleteCollectionResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{15} } -type AssignVolumeRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` - Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` - Preallocate int64 `protobuf:"varint,3,opt,name=preallocate" json:"preallocate,omitempty"` - Replication string `protobuf:"bytes,4,opt,name=replication" json:"replication,omitempty"` - Ttl string `protobuf:"bytes,5,opt,name=ttl" json:"ttl,omitempty"` +type AllocateVolumeRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + Preallocate int64 `protobuf:"varint,3,opt,name=preallocate" json:"preallocate,omitempty"` + Replication string `protobuf:"bytes,4,opt,name=replication" json:"replication,omitempty"` + Ttl string `protobuf:"bytes,5,opt,name=ttl" json:"ttl,omitempty"` + MemoryMapMaxSizeMb uint32 `protobuf:"varint,6,opt,name=memory_map_max_size_mb,json=memoryMapMaxSizeMb" json:"memory_map_max_size_mb,omitempty"` } -func (m *AssignVolumeRequest) Reset() { *m = AssignVolumeRequest{} } -func (m *AssignVolumeRequest) String() string { return proto.CompactTextString(m) } -func (*AssignVolumeRequest) ProtoMessage() {} -func (*AssignVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} } +func (m *AllocateVolumeRequest) Reset() { *m = AllocateVolumeRequest{} } +func (m *AllocateVolumeRequest) String() string { return proto.CompactTextString(m) } +func (*AllocateVolumeRequest) ProtoMessage() {} +func (*AllocateVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{16} } -func (m *AssignVolumeRequest) GetVolumdId() uint32 { +func (m *AllocateVolumeRequest) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } -func (m *AssignVolumeRequest) GetCollection() string { +func (m *AllocateVolumeRequest) GetCollection() string { if m != nil { return m.Collection } return "" } -func (m *AssignVolumeRequest) GetPreallocate() int64 { +func (m *AllocateVolumeRequest) GetPreallocate() int64 { if m != nil { return m.Preallocate } return 0 } -func (m *AssignVolumeRequest) GetReplication() string { +func (m *AllocateVolumeRequest) GetReplication() string { if m != nil { return m.Replication } return "" } -func (m *AssignVolumeRequest) GetTtl() string { +func (m *AllocateVolumeRequest) GetTtl() string { if m != nil { return m.Ttl } return "" } -type AssignVolumeResponse struct { +func (m *AllocateVolumeRequest) GetMemoryMapMaxSizeMb() uint32 { + if m != nil { + return m.MemoryMapMaxSizeMb + } + return 0 } -func (m *AssignVolumeResponse) Reset() { *m = AssignVolumeResponse{} } -func (m *AssignVolumeResponse) String() string { return proto.CompactTextString(m) } -func (*AssignVolumeResponse) ProtoMessage() {} -func (*AssignVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{15} } +type AllocateVolumeResponse struct { +} + +func (m *AllocateVolumeResponse) Reset() { *m = AllocateVolumeResponse{} } +func (m *AllocateVolumeResponse) String() string { return proto.CompactTextString(m) } +func (*AllocateVolumeResponse) ProtoMessage() {} +func (*AllocateVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{17} } type VolumeSyncStatusRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` } func (m *VolumeSyncStatusRequest) Reset() { *m = VolumeSyncStatusRequest{} } func (m *VolumeSyncStatusRequest) String() string { return proto.CompactTextString(m) } func (*VolumeSyncStatusRequest) ProtoMessage() {} -func (*VolumeSyncStatusRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{16} } +func (*VolumeSyncStatusRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{18} } -func (m *VolumeSyncStatusRequest) GetVolumdId() uint32 { +func (m *VolumeSyncStatusRequest) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } type VolumeSyncStatusResponse struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` Replication string `protobuf:"bytes,4,opt,name=replication" json:"replication,omitempty"` Ttl string `protobuf:"bytes,5,opt,name=ttl" json:"ttl,omitempty"` TailOffset uint64 `protobuf:"varint,6,opt,name=tail_offset,json=tailOffset" json:"tail_offset,omitempty"` @@ -362,15 +525,22 @@ type VolumeSyncStatusResponse struct { func (m *VolumeSyncStatusResponse) Reset() { *m = VolumeSyncStatusResponse{} } func (m *VolumeSyncStatusResponse) String() string { return proto.CompactTextString(m) } func (*VolumeSyncStatusResponse) ProtoMessage() {} -func (*VolumeSyncStatusResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{17} } +func (*VolumeSyncStatusResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{19} } -func (m *VolumeSyncStatusResponse) GetVolumdId() uint32 { +func (m *VolumeSyncStatusResponse) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } +func (m *VolumeSyncStatusResponse) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + func (m *VolumeSyncStatusResponse) GetReplication() string { if m != nil { return m.Replication @@ -406,177 +576,947 @@ func (m *VolumeSyncStatusResponse) GetIdxFileSize() uint64 { return 0 } -type VolumeSyncIndexRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` +type VolumeIncrementalCopyRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + SinceNs uint64 `protobuf:"varint,2,opt,name=since_ns,json=sinceNs" json:"since_ns,omitempty"` } -func (m *VolumeSyncIndexRequest) Reset() { *m = VolumeSyncIndexRequest{} } -func (m *VolumeSyncIndexRequest) String() string { return proto.CompactTextString(m) } -func (*VolumeSyncIndexRequest) ProtoMessage() {} -func (*VolumeSyncIndexRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{18} } +func (m *VolumeIncrementalCopyRequest) Reset() { *m = VolumeIncrementalCopyRequest{} } +func (m *VolumeIncrementalCopyRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeIncrementalCopyRequest) ProtoMessage() {} +func (*VolumeIncrementalCopyRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{20} } + +func (m *VolumeIncrementalCopyRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} -func (m *VolumeSyncIndexRequest) GetVolumdId() uint32 { +func (m *VolumeIncrementalCopyRequest) GetSinceNs() uint64 { if m != nil { - return m.VolumdId + return m.SinceNs } return 0 } -type VolumeSyncIndexResponse struct { - IndexFileContent []byte `protobuf:"bytes,1,opt,name=index_file_content,json=indexFileContent,proto3" json:"index_file_content,omitempty"` +type VolumeIncrementalCopyResponse struct { + FileContent []byte `protobuf:"bytes,1,opt,name=file_content,json=fileContent,proto3" json:"file_content,omitempty"` } -func (m *VolumeSyncIndexResponse) Reset() { *m = VolumeSyncIndexResponse{} } -func (m *VolumeSyncIndexResponse) String() string { return proto.CompactTextString(m) } -func (*VolumeSyncIndexResponse) ProtoMessage() {} -func (*VolumeSyncIndexResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{19} } +func (m *VolumeIncrementalCopyResponse) Reset() { *m = VolumeIncrementalCopyResponse{} } +func (m *VolumeIncrementalCopyResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeIncrementalCopyResponse) ProtoMessage() {} +func (*VolumeIncrementalCopyResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{21} } -func (m *VolumeSyncIndexResponse) GetIndexFileContent() []byte { +func (m *VolumeIncrementalCopyResponse) GetFileContent() []byte { if m != nil { - return m.IndexFileContent + return m.FileContent } return nil } -type VolumeSyncDataRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` - Revision uint32 `protobuf:"varint,2,opt,name=revision" json:"revision,omitempty"` - Offset uint32 `protobuf:"varint,3,opt,name=offset" json:"offset,omitempty"` - Size uint32 `protobuf:"varint,4,opt,name=size" json:"size,omitempty"` - NeedleId string `protobuf:"bytes,5,opt,name=needle_id,json=needleId" json:"needle_id,omitempty"` +type VolumeMountRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` } -func (m *VolumeSyncDataRequest) Reset() { *m = VolumeSyncDataRequest{} } -func (m *VolumeSyncDataRequest) String() string { return proto.CompactTextString(m) } -func (*VolumeSyncDataRequest) ProtoMessage() {} -func (*VolumeSyncDataRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{20} } +func (m *VolumeMountRequest) Reset() { *m = VolumeMountRequest{} } +func (m *VolumeMountRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeMountRequest) ProtoMessage() {} +func (*VolumeMountRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{22} } -func (m *VolumeSyncDataRequest) GetVolumdId() uint32 { +func (m *VolumeMountRequest) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } -func (m *VolumeSyncDataRequest) GetRevision() uint32 { +type VolumeMountResponse struct { +} + +func (m *VolumeMountResponse) Reset() { *m = VolumeMountResponse{} } +func (m *VolumeMountResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeMountResponse) ProtoMessage() {} +func (*VolumeMountResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{23} } + +type VolumeUnmountRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` +} + +func (m *VolumeUnmountRequest) Reset() { *m = VolumeUnmountRequest{} } +func (m *VolumeUnmountRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeUnmountRequest) ProtoMessage() {} +func (*VolumeUnmountRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{24} } + +func (m *VolumeUnmountRequest) GetVolumeId() uint32 { if m != nil { - return m.Revision + return m.VolumeId } return 0 } -func (m *VolumeSyncDataRequest) GetOffset() uint32 { +type VolumeUnmountResponse struct { +} + +func (m *VolumeUnmountResponse) Reset() { *m = VolumeUnmountResponse{} } +func (m *VolumeUnmountResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeUnmountResponse) ProtoMessage() {} +func (*VolumeUnmountResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{25} } + +type VolumeDeleteRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` +} + +func (m *VolumeDeleteRequest) Reset() { *m = VolumeDeleteRequest{} } +func (m *VolumeDeleteRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeDeleteRequest) ProtoMessage() {} +func (*VolumeDeleteRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{26} } + +func (m *VolumeDeleteRequest) GetVolumeId() uint32 { if m != nil { - return m.Offset + return m.VolumeId } return 0 } -func (m *VolumeSyncDataRequest) GetSize() uint32 { +type VolumeDeleteResponse struct { +} + +func (m *VolumeDeleteResponse) Reset() { *m = VolumeDeleteResponse{} } +func (m *VolumeDeleteResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeDeleteResponse) ProtoMessage() {} +func (*VolumeDeleteResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{27} } + +type VolumeMarkReadonlyRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` +} + +func (m *VolumeMarkReadonlyRequest) Reset() { *m = VolumeMarkReadonlyRequest{} } +func (m *VolumeMarkReadonlyRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeMarkReadonlyRequest) ProtoMessage() {} +func (*VolumeMarkReadonlyRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{28} } + +func (m *VolumeMarkReadonlyRequest) GetVolumeId() uint32 { if m != nil { - return m.Size + return m.VolumeId + } + return 0 +} + +type VolumeMarkReadonlyResponse struct { +} + +func (m *VolumeMarkReadonlyResponse) Reset() { *m = VolumeMarkReadonlyResponse{} } +func (m *VolumeMarkReadonlyResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeMarkReadonlyResponse) ProtoMessage() {} +func (*VolumeMarkReadonlyResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{29} } + +type VolumeConfigureRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Replication string `protobuf:"bytes,2,opt,name=replication" json:"replication,omitempty"` +} + +func (m *VolumeConfigureRequest) Reset() { *m = VolumeConfigureRequest{} } +func (m *VolumeConfigureRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeConfigureRequest) ProtoMessage() {} +func (*VolumeConfigureRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{30} } + +func (m *VolumeConfigureRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId } return 0 } -func (m *VolumeSyncDataRequest) GetNeedleId() string { +func (m *VolumeConfigureRequest) GetReplication() string { if m != nil { - return m.NeedleId + return m.Replication } return "" } -type VolumeSyncDataResponse struct { +type VolumeConfigureResponse struct { + Error string `protobuf:"bytes,1,opt,name=error" json:"error,omitempty"` +} + +func (m *VolumeConfigureResponse) Reset() { *m = VolumeConfigureResponse{} } +func (m *VolumeConfigureResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeConfigureResponse) ProtoMessage() {} +func (*VolumeConfigureResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{31} } + +func (m *VolumeConfigureResponse) GetError() string { + if m != nil { + return m.Error + } + return "" +} + +type VolumeCopyRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + Replication string `protobuf:"bytes,3,opt,name=replication" json:"replication,omitempty"` + Ttl string `protobuf:"bytes,4,opt,name=ttl" json:"ttl,omitempty"` + SourceDataNode string `protobuf:"bytes,5,opt,name=source_data_node,json=sourceDataNode" json:"source_data_node,omitempty"` +} + +func (m *VolumeCopyRequest) Reset() { *m = VolumeCopyRequest{} } +func (m *VolumeCopyRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeCopyRequest) ProtoMessage() {} +func (*VolumeCopyRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{32} } + +func (m *VolumeCopyRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeCopyRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *VolumeCopyRequest) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +func (m *VolumeCopyRequest) GetTtl() string { + if m != nil { + return m.Ttl + } + return "" +} + +func (m *VolumeCopyRequest) GetSourceDataNode() string { + if m != nil { + return m.SourceDataNode + } + return "" +} + +type VolumeCopyResponse struct { + LastAppendAtNs uint64 `protobuf:"varint,1,opt,name=last_append_at_ns,json=lastAppendAtNs" json:"last_append_at_ns,omitempty"` +} + +func (m *VolumeCopyResponse) Reset() { *m = VolumeCopyResponse{} } +func (m *VolumeCopyResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeCopyResponse) ProtoMessage() {} +func (*VolumeCopyResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{33} } + +func (m *VolumeCopyResponse) GetLastAppendAtNs() uint64 { + if m != nil { + return m.LastAppendAtNs + } + return 0 +} + +type CopyFileRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Ext string `protobuf:"bytes,2,opt,name=ext" json:"ext,omitempty"` + CompactionRevision uint32 `protobuf:"varint,3,opt,name=compaction_revision,json=compactionRevision" json:"compaction_revision,omitempty"` + StopOffset uint64 `protobuf:"varint,4,opt,name=stop_offset,json=stopOffset" json:"stop_offset,omitempty"` + Collection string `protobuf:"bytes,5,opt,name=collection" json:"collection,omitempty"` + IsEcVolume bool `protobuf:"varint,6,opt,name=is_ec_volume,json=isEcVolume" json:"is_ec_volume,omitempty"` + IgnoreSourceFileNotFound bool `protobuf:"varint,7,opt,name=ignore_source_file_not_found,json=ignoreSourceFileNotFound" json:"ignore_source_file_not_found,omitempty"` +} + +func (m *CopyFileRequest) Reset() { *m = CopyFileRequest{} } +func (m *CopyFileRequest) String() string { return proto.CompactTextString(m) } +func (*CopyFileRequest) ProtoMessage() {} +func (*CopyFileRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{34} } + +func (m *CopyFileRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *CopyFileRequest) GetExt() string { + if m != nil { + return m.Ext + } + return "" +} + +func (m *CopyFileRequest) GetCompactionRevision() uint32 { + if m != nil { + return m.CompactionRevision + } + return 0 +} + +func (m *CopyFileRequest) GetStopOffset() uint64 { + if m != nil { + return m.StopOffset + } + return 0 +} + +func (m *CopyFileRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *CopyFileRequest) GetIsEcVolume() bool { + if m != nil { + return m.IsEcVolume + } + return false +} + +func (m *CopyFileRequest) GetIgnoreSourceFileNotFound() bool { + if m != nil { + return m.IgnoreSourceFileNotFound + } + return false +} + +type CopyFileResponse struct { FileContent []byte `protobuf:"bytes,1,opt,name=file_content,json=fileContent,proto3" json:"file_content,omitempty"` } -func (m *VolumeSyncDataResponse) Reset() { *m = VolumeSyncDataResponse{} } -func (m *VolumeSyncDataResponse) String() string { return proto.CompactTextString(m) } -func (*VolumeSyncDataResponse) ProtoMessage() {} -func (*VolumeSyncDataResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{21} } +func (m *CopyFileResponse) Reset() { *m = CopyFileResponse{} } +func (m *CopyFileResponse) String() string { return proto.CompactTextString(m) } +func (*CopyFileResponse) ProtoMessage() {} +func (*CopyFileResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{35} } -func (m *VolumeSyncDataResponse) GetFileContent() []byte { +func (m *CopyFileResponse) GetFileContent() []byte { if m != nil { return m.FileContent } return nil } -type VolumeMountRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` +type VolumeTailSenderRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + SinceNs uint64 `protobuf:"varint,2,opt,name=since_ns,json=sinceNs" json:"since_ns,omitempty"` + IdleTimeoutSeconds uint32 `protobuf:"varint,3,opt,name=idle_timeout_seconds,json=idleTimeoutSeconds" json:"idle_timeout_seconds,omitempty"` } -func (m *VolumeMountRequest) Reset() { *m = VolumeMountRequest{} } -func (m *VolumeMountRequest) String() string { return proto.CompactTextString(m) } -func (*VolumeMountRequest) ProtoMessage() {} -func (*VolumeMountRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{22} } +func (m *VolumeTailSenderRequest) Reset() { *m = VolumeTailSenderRequest{} } +func (m *VolumeTailSenderRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeTailSenderRequest) ProtoMessage() {} +func (*VolumeTailSenderRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{36} } -func (m *VolumeMountRequest) GetVolumdId() uint32 { +func (m *VolumeTailSenderRequest) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } -type VolumeMountResponse struct { +func (m *VolumeTailSenderRequest) GetSinceNs() uint64 { + if m != nil { + return m.SinceNs + } + return 0 } -func (m *VolumeMountResponse) Reset() { *m = VolumeMountResponse{} } -func (m *VolumeMountResponse) String() string { return proto.CompactTextString(m) } -func (*VolumeMountResponse) ProtoMessage() {} -func (*VolumeMountResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{23} } +func (m *VolumeTailSenderRequest) GetIdleTimeoutSeconds() uint32 { + if m != nil { + return m.IdleTimeoutSeconds + } + return 0 +} -type VolumeUnmountRequest struct { - VolumdId uint32 `protobuf:"varint,1,opt,name=volumd_id,json=volumdId" json:"volumd_id,omitempty"` +type VolumeTailSenderResponse struct { + NeedleHeader []byte `protobuf:"bytes,1,opt,name=needle_header,json=needleHeader,proto3" json:"needle_header,omitempty"` + NeedleBody []byte `protobuf:"bytes,2,opt,name=needle_body,json=needleBody,proto3" json:"needle_body,omitempty"` + IsLastChunk bool `protobuf:"varint,3,opt,name=is_last_chunk,json=isLastChunk" json:"is_last_chunk,omitempty"` } -func (m *VolumeUnmountRequest) Reset() { *m = VolumeUnmountRequest{} } -func (m *VolumeUnmountRequest) String() string { return proto.CompactTextString(m) } -func (*VolumeUnmountRequest) ProtoMessage() {} -func (*VolumeUnmountRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{24} } +func (m *VolumeTailSenderResponse) Reset() { *m = VolumeTailSenderResponse{} } +func (m *VolumeTailSenderResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeTailSenderResponse) ProtoMessage() {} +func (*VolumeTailSenderResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{37} } + +func (m *VolumeTailSenderResponse) GetNeedleHeader() []byte { + if m != nil { + return m.NeedleHeader + } + return nil +} + +func (m *VolumeTailSenderResponse) GetNeedleBody() []byte { + if m != nil { + return m.NeedleBody + } + return nil +} + +func (m *VolumeTailSenderResponse) GetIsLastChunk() bool { + if m != nil { + return m.IsLastChunk + } + return false +} + +type VolumeTailReceiverRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + SinceNs uint64 `protobuf:"varint,2,opt,name=since_ns,json=sinceNs" json:"since_ns,omitempty"` + IdleTimeoutSeconds uint32 `protobuf:"varint,3,opt,name=idle_timeout_seconds,json=idleTimeoutSeconds" json:"idle_timeout_seconds,omitempty"` + SourceVolumeServer string `protobuf:"bytes,4,opt,name=source_volume_server,json=sourceVolumeServer" json:"source_volume_server,omitempty"` +} + +func (m *VolumeTailReceiverRequest) Reset() { *m = VolumeTailReceiverRequest{} } +func (m *VolumeTailReceiverRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeTailReceiverRequest) ProtoMessage() {} +func (*VolumeTailReceiverRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{38} } -func (m *VolumeUnmountRequest) GetVolumdId() uint32 { +func (m *VolumeTailReceiverRequest) GetVolumeId() uint32 { if m != nil { - return m.VolumdId + return m.VolumeId } return 0 } -type VolumeUnmountResponse struct { +func (m *VolumeTailReceiverRequest) GetSinceNs() uint64 { + if m != nil { + return m.SinceNs + } + return 0 } -func (m *VolumeUnmountResponse) Reset() { *m = VolumeUnmountResponse{} } -func (m *VolumeUnmountResponse) String() string { return proto.CompactTextString(m) } -func (*VolumeUnmountResponse) ProtoMessage() {} -func (*VolumeUnmountResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{25} } +func (m *VolumeTailReceiverRequest) GetIdleTimeoutSeconds() uint32 { + if m != nil { + return m.IdleTimeoutSeconds + } + return 0 +} + +func (m *VolumeTailReceiverRequest) GetSourceVolumeServer() string { + if m != nil { + return m.SourceVolumeServer + } + return "" +} -type VolumeUiPageRequest struct { +type VolumeTailReceiverResponse struct { } -func (m *VolumeUiPageRequest) Reset() { *m = VolumeUiPageRequest{} } -func (m *VolumeUiPageRequest) String() string { return proto.CompactTextString(m) } -func (*VolumeUiPageRequest) ProtoMessage() {} -func (*VolumeUiPageRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{26} } +func (m *VolumeTailReceiverResponse) Reset() { *m = VolumeTailReceiverResponse{} } +func (m *VolumeTailReceiverResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeTailReceiverResponse) ProtoMessage() {} +func (*VolumeTailReceiverResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{39} } -type VolumeUiPageResponse struct { +type VolumeEcShardsGenerateRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` } -func (m *VolumeUiPageResponse) Reset() { *m = VolumeUiPageResponse{} } -func (m *VolumeUiPageResponse) String() string { return proto.CompactTextString(m) } -func (*VolumeUiPageResponse) ProtoMessage() {} -func (*VolumeUiPageResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{27} } +func (m *VolumeEcShardsGenerateRequest) Reset() { *m = VolumeEcShardsGenerateRequest{} } +func (m *VolumeEcShardsGenerateRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsGenerateRequest) ProtoMessage() {} +func (*VolumeEcShardsGenerateRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{40} } + +func (m *VolumeEcShardsGenerateRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeEcShardsGenerateRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +type VolumeEcShardsGenerateResponse struct { +} + +func (m *VolumeEcShardsGenerateResponse) Reset() { *m = VolumeEcShardsGenerateResponse{} } +func (m *VolumeEcShardsGenerateResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsGenerateResponse) ProtoMessage() {} +func (*VolumeEcShardsGenerateResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{41} } + +type VolumeEcShardsRebuildRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` +} + +func (m *VolumeEcShardsRebuildRequest) Reset() { *m = VolumeEcShardsRebuildRequest{} } +func (m *VolumeEcShardsRebuildRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsRebuildRequest) ProtoMessage() {} +func (*VolumeEcShardsRebuildRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{42} } + +func (m *VolumeEcShardsRebuildRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeEcShardsRebuildRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +type VolumeEcShardsRebuildResponse struct { + RebuiltShardIds []uint32 `protobuf:"varint,1,rep,packed,name=rebuilt_shard_ids,json=rebuiltShardIds" json:"rebuilt_shard_ids,omitempty"` +} + +func (m *VolumeEcShardsRebuildResponse) Reset() { *m = VolumeEcShardsRebuildResponse{} } +func (m *VolumeEcShardsRebuildResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsRebuildResponse) ProtoMessage() {} +func (*VolumeEcShardsRebuildResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{43} } + +func (m *VolumeEcShardsRebuildResponse) GetRebuiltShardIds() []uint32 { + if m != nil { + return m.RebuiltShardIds + } + return nil +} + +type VolumeEcShardsCopyRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + ShardIds []uint32 `protobuf:"varint,3,rep,packed,name=shard_ids,json=shardIds" json:"shard_ids,omitempty"` + CopyEcxFile bool `protobuf:"varint,4,opt,name=copy_ecx_file,json=copyEcxFile" json:"copy_ecx_file,omitempty"` + SourceDataNode string `protobuf:"bytes,5,opt,name=source_data_node,json=sourceDataNode" json:"source_data_node,omitempty"` + CopyEcjFile bool `protobuf:"varint,6,opt,name=copy_ecj_file,json=copyEcjFile" json:"copy_ecj_file,omitempty"` + CopyVifFile bool `protobuf:"varint,7,opt,name=copy_vif_file,json=copyVifFile" json:"copy_vif_file,omitempty"` +} + +func (m *VolumeEcShardsCopyRequest) Reset() { *m = VolumeEcShardsCopyRequest{} } +func (m *VolumeEcShardsCopyRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsCopyRequest) ProtoMessage() {} +func (*VolumeEcShardsCopyRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{44} } + +func (m *VolumeEcShardsCopyRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeEcShardsCopyRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *VolumeEcShardsCopyRequest) GetShardIds() []uint32 { + if m != nil { + return m.ShardIds + } + return nil +} + +func (m *VolumeEcShardsCopyRequest) GetCopyEcxFile() bool { + if m != nil { + return m.CopyEcxFile + } + return false +} + +func (m *VolumeEcShardsCopyRequest) GetSourceDataNode() string { + if m != nil { + return m.SourceDataNode + } + return "" +} + +func (m *VolumeEcShardsCopyRequest) GetCopyEcjFile() bool { + if m != nil { + return m.CopyEcjFile + } + return false +} + +func (m *VolumeEcShardsCopyRequest) GetCopyVifFile() bool { + if m != nil { + return m.CopyVifFile + } + return false +} + +type VolumeEcShardsCopyResponse struct { +} + +func (m *VolumeEcShardsCopyResponse) Reset() { *m = VolumeEcShardsCopyResponse{} } +func (m *VolumeEcShardsCopyResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsCopyResponse) ProtoMessage() {} +func (*VolumeEcShardsCopyResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{45} } + +type VolumeEcShardsDeleteRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + ShardIds []uint32 `protobuf:"varint,3,rep,packed,name=shard_ids,json=shardIds" json:"shard_ids,omitempty"` +} + +func (m *VolumeEcShardsDeleteRequest) Reset() { *m = VolumeEcShardsDeleteRequest{} } +func (m *VolumeEcShardsDeleteRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsDeleteRequest) ProtoMessage() {} +func (*VolumeEcShardsDeleteRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{46} } + +func (m *VolumeEcShardsDeleteRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeEcShardsDeleteRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *VolumeEcShardsDeleteRequest) GetShardIds() []uint32 { + if m != nil { + return m.ShardIds + } + return nil +} + +type VolumeEcShardsDeleteResponse struct { +} + +func (m *VolumeEcShardsDeleteResponse) Reset() { *m = VolumeEcShardsDeleteResponse{} } +func (m *VolumeEcShardsDeleteResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsDeleteResponse) ProtoMessage() {} +func (*VolumeEcShardsDeleteResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{47} } + +type VolumeEcShardsMountRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + ShardIds []uint32 `protobuf:"varint,3,rep,packed,name=shard_ids,json=shardIds" json:"shard_ids,omitempty"` +} + +func (m *VolumeEcShardsMountRequest) Reset() { *m = VolumeEcShardsMountRequest{} } +func (m *VolumeEcShardsMountRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsMountRequest) ProtoMessage() {} +func (*VolumeEcShardsMountRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{48} } + +func (m *VolumeEcShardsMountRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeEcShardsMountRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *VolumeEcShardsMountRequest) GetShardIds() []uint32 { + if m != nil { + return m.ShardIds + } + return nil +} + +type VolumeEcShardsMountResponse struct { +} + +func (m *VolumeEcShardsMountResponse) Reset() { *m = VolumeEcShardsMountResponse{} } +func (m *VolumeEcShardsMountResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsMountResponse) ProtoMessage() {} +func (*VolumeEcShardsMountResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{49} } + +type VolumeEcShardsUnmountRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + ShardIds []uint32 `protobuf:"varint,3,rep,packed,name=shard_ids,json=shardIds" json:"shard_ids,omitempty"` +} + +func (m *VolumeEcShardsUnmountRequest) Reset() { *m = VolumeEcShardsUnmountRequest{} } +func (m *VolumeEcShardsUnmountRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsUnmountRequest) ProtoMessage() {} +func (*VolumeEcShardsUnmountRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{50} } + +func (m *VolumeEcShardsUnmountRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeEcShardsUnmountRequest) GetShardIds() []uint32 { + if m != nil { + return m.ShardIds + } + return nil +} + +type VolumeEcShardsUnmountResponse struct { +} + +func (m *VolumeEcShardsUnmountResponse) Reset() { *m = VolumeEcShardsUnmountResponse{} } +func (m *VolumeEcShardsUnmountResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsUnmountResponse) ProtoMessage() {} +func (*VolumeEcShardsUnmountResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{51} } + +type VolumeEcShardReadRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + ShardId uint32 `protobuf:"varint,2,opt,name=shard_id,json=shardId" json:"shard_id,omitempty"` + Offset int64 `protobuf:"varint,3,opt,name=offset" json:"offset,omitempty"` + Size int64 `protobuf:"varint,4,opt,name=size" json:"size,omitempty"` + FileKey uint64 `protobuf:"varint,5,opt,name=file_key,json=fileKey" json:"file_key,omitempty"` +} + +func (m *VolumeEcShardReadRequest) Reset() { *m = VolumeEcShardReadRequest{} } +func (m *VolumeEcShardReadRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardReadRequest) ProtoMessage() {} +func (*VolumeEcShardReadRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{52} } + +func (m *VolumeEcShardReadRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeEcShardReadRequest) GetShardId() uint32 { + if m != nil { + return m.ShardId + } + return 0 +} + +func (m *VolumeEcShardReadRequest) GetOffset() int64 { + if m != nil { + return m.Offset + } + return 0 +} + +func (m *VolumeEcShardReadRequest) GetSize() int64 { + if m != nil { + return m.Size + } + return 0 +} + +func (m *VolumeEcShardReadRequest) GetFileKey() uint64 { + if m != nil { + return m.FileKey + } + return 0 +} + +type VolumeEcShardReadResponse struct { + Data []byte `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` + IsDeleted bool `protobuf:"varint,2,opt,name=is_deleted,json=isDeleted" json:"is_deleted,omitempty"` +} + +func (m *VolumeEcShardReadResponse) Reset() { *m = VolumeEcShardReadResponse{} } +func (m *VolumeEcShardReadResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardReadResponse) ProtoMessage() {} +func (*VolumeEcShardReadResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{53} } + +func (m *VolumeEcShardReadResponse) GetData() []byte { + if m != nil { + return m.Data + } + return nil +} + +func (m *VolumeEcShardReadResponse) GetIsDeleted() bool { + if m != nil { + return m.IsDeleted + } + return false +} + +type VolumeEcBlobDeleteRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + FileKey uint64 `protobuf:"varint,3,opt,name=file_key,json=fileKey" json:"file_key,omitempty"` + Version uint32 `protobuf:"varint,4,opt,name=version" json:"version,omitempty"` +} + +func (m *VolumeEcBlobDeleteRequest) Reset() { *m = VolumeEcBlobDeleteRequest{} } +func (m *VolumeEcBlobDeleteRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeEcBlobDeleteRequest) ProtoMessage() {} +func (*VolumeEcBlobDeleteRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{54} } + +func (m *VolumeEcBlobDeleteRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeEcBlobDeleteRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *VolumeEcBlobDeleteRequest) GetFileKey() uint64 { + if m != nil { + return m.FileKey + } + return 0 +} + +func (m *VolumeEcBlobDeleteRequest) GetVersion() uint32 { + if m != nil { + return m.Version + } + return 0 +} + +type VolumeEcBlobDeleteResponse struct { +} + +func (m *VolumeEcBlobDeleteResponse) Reset() { *m = VolumeEcBlobDeleteResponse{} } +func (m *VolumeEcBlobDeleteResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeEcBlobDeleteResponse) ProtoMessage() {} +func (*VolumeEcBlobDeleteResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{55} } + +type VolumeEcShardsToVolumeRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` +} + +func (m *VolumeEcShardsToVolumeRequest) Reset() { *m = VolumeEcShardsToVolumeRequest{} } +func (m *VolumeEcShardsToVolumeRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsToVolumeRequest) ProtoMessage() {} +func (*VolumeEcShardsToVolumeRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{56} } + +func (m *VolumeEcShardsToVolumeRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeEcShardsToVolumeRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +type VolumeEcShardsToVolumeResponse struct { +} + +func (m *VolumeEcShardsToVolumeResponse) Reset() { *m = VolumeEcShardsToVolumeResponse{} } +func (m *VolumeEcShardsToVolumeResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeEcShardsToVolumeResponse) ProtoMessage() {} +func (*VolumeEcShardsToVolumeResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{57} } + +type ReadVolumeFileStatusRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` +} + +func (m *ReadVolumeFileStatusRequest) Reset() { *m = ReadVolumeFileStatusRequest{} } +func (m *ReadVolumeFileStatusRequest) String() string { return proto.CompactTextString(m) } +func (*ReadVolumeFileStatusRequest) ProtoMessage() {} +func (*ReadVolumeFileStatusRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{58} } + +func (m *ReadVolumeFileStatusRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +type ReadVolumeFileStatusResponse struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + IdxFileTimestampSeconds uint64 `protobuf:"varint,2,opt,name=idx_file_timestamp_seconds,json=idxFileTimestampSeconds" json:"idx_file_timestamp_seconds,omitempty"` + IdxFileSize uint64 `protobuf:"varint,3,opt,name=idx_file_size,json=idxFileSize" json:"idx_file_size,omitempty"` + DatFileTimestampSeconds uint64 `protobuf:"varint,4,opt,name=dat_file_timestamp_seconds,json=datFileTimestampSeconds" json:"dat_file_timestamp_seconds,omitempty"` + DatFileSize uint64 `protobuf:"varint,5,opt,name=dat_file_size,json=datFileSize" json:"dat_file_size,omitempty"` + FileCount uint64 `protobuf:"varint,6,opt,name=file_count,json=fileCount" json:"file_count,omitempty"` + CompactionRevision uint32 `protobuf:"varint,7,opt,name=compaction_revision,json=compactionRevision" json:"compaction_revision,omitempty"` + Collection string `protobuf:"bytes,8,opt,name=collection" json:"collection,omitempty"` +} + +func (m *ReadVolumeFileStatusResponse) Reset() { *m = ReadVolumeFileStatusResponse{} } +func (m *ReadVolumeFileStatusResponse) String() string { return proto.CompactTextString(m) } +func (*ReadVolumeFileStatusResponse) ProtoMessage() {} +func (*ReadVolumeFileStatusResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{59} } + +func (m *ReadVolumeFileStatusResponse) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *ReadVolumeFileStatusResponse) GetIdxFileTimestampSeconds() uint64 { + if m != nil { + return m.IdxFileTimestampSeconds + } + return 0 +} + +func (m *ReadVolumeFileStatusResponse) GetIdxFileSize() uint64 { + if m != nil { + return m.IdxFileSize + } + return 0 +} + +func (m *ReadVolumeFileStatusResponse) GetDatFileTimestampSeconds() uint64 { + if m != nil { + return m.DatFileTimestampSeconds + } + return 0 +} + +func (m *ReadVolumeFileStatusResponse) GetDatFileSize() uint64 { + if m != nil { + return m.DatFileSize + } + return 0 +} + +func (m *ReadVolumeFileStatusResponse) GetFileCount() uint64 { + if m != nil { + return m.FileCount + } + return 0 +} + +func (m *ReadVolumeFileStatusResponse) GetCompactionRevision() uint32 { + if m != nil { + return m.CompactionRevision + } + return 0 +} + +func (m *ReadVolumeFileStatusResponse) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} type DiskStatus struct { - Dir string `protobuf:"bytes,1,opt,name=dir" json:"dir,omitempty"` - All uint64 `protobuf:"varint,2,opt,name=all" json:"all,omitempty"` - Used uint64 `protobuf:"varint,3,opt,name=used" json:"used,omitempty"` - Free uint64 `protobuf:"varint,4,opt,name=free" json:"free,omitempty"` + Dir string `protobuf:"bytes,1,opt,name=dir" json:"dir,omitempty"` + All uint64 `protobuf:"varint,2,opt,name=all" json:"all,omitempty"` + Used uint64 `protobuf:"varint,3,opt,name=used" json:"used,omitempty"` + Free uint64 `protobuf:"varint,4,opt,name=free" json:"free,omitempty"` + PercentFree float32 `protobuf:"fixed32,5,opt,name=percent_free,json=percentFree" json:"percent_free,omitempty"` + PercentUsed float32 `protobuf:"fixed32,6,opt,name=percent_used,json=percentUsed" json:"percent_used,omitempty"` } func (m *DiskStatus) Reset() { *m = DiskStatus{} } func (m *DiskStatus) String() string { return proto.CompactTextString(m) } func (*DiskStatus) ProtoMessage() {} -func (*DiskStatus) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{28} } +func (*DiskStatus) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{60} } func (m *DiskStatus) GetDir() string { if m != nil { @@ -606,6 +1546,20 @@ func (m *DiskStatus) GetFree() uint64 { return 0 } +func (m *DiskStatus) GetPercentFree() float32 { + if m != nil { + return m.PercentFree + } + return 0 +} + +func (m *DiskStatus) GetPercentUsed() float32 { + if m != nil { + return m.PercentUsed + } + return 0 +} + type MemStatus struct { Goroutines int32 `protobuf:"varint,1,opt,name=goroutines" json:"goroutines,omitempty"` All uint64 `protobuf:"varint,2,opt,name=all" json:"all,omitempty"` @@ -619,7 +1573,7 @@ type MemStatus struct { func (m *MemStatus) Reset() { *m = MemStatus{} } func (m *MemStatus) String() string { return proto.CompactTextString(m) } func (*MemStatus) ProtoMessage() {} -func (*MemStatus) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{29} } +func (*MemStatus) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{61} } func (m *MemStatus) GetGoroutines() int32 { if m != nil { @@ -670,10 +1624,614 @@ func (m *MemStatus) GetStack() uint64 { return 0 } +// tired storage on volume servers +type RemoteFile struct { + BackendType string `protobuf:"bytes,1,opt,name=backend_type,json=backendType" json:"backend_type,omitempty"` + BackendId string `protobuf:"bytes,2,opt,name=backend_id,json=backendId" json:"backend_id,omitempty"` + Key string `protobuf:"bytes,3,opt,name=key" json:"key,omitempty"` + Offset uint64 `protobuf:"varint,4,opt,name=offset" json:"offset,omitempty"` + FileSize uint64 `protobuf:"varint,5,opt,name=file_size,json=fileSize" json:"file_size,omitempty"` + ModifiedTime uint64 `protobuf:"varint,6,opt,name=modified_time,json=modifiedTime" json:"modified_time,omitempty"` + Extension string `protobuf:"bytes,7,opt,name=extension" json:"extension,omitempty"` +} + +func (m *RemoteFile) Reset() { *m = RemoteFile{} } +func (m *RemoteFile) String() string { return proto.CompactTextString(m) } +func (*RemoteFile) ProtoMessage() {} +func (*RemoteFile) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{62} } + +func (m *RemoteFile) GetBackendType() string { + if m != nil { + return m.BackendType + } + return "" +} + +func (m *RemoteFile) GetBackendId() string { + if m != nil { + return m.BackendId + } + return "" +} + +func (m *RemoteFile) GetKey() string { + if m != nil { + return m.Key + } + return "" +} + +func (m *RemoteFile) GetOffset() uint64 { + if m != nil { + return m.Offset + } + return 0 +} + +func (m *RemoteFile) GetFileSize() uint64 { + if m != nil { + return m.FileSize + } + return 0 +} + +func (m *RemoteFile) GetModifiedTime() uint64 { + if m != nil { + return m.ModifiedTime + } + return 0 +} + +func (m *RemoteFile) GetExtension() string { + if m != nil { + return m.Extension + } + return "" +} + +type VolumeInfo struct { + Files []*RemoteFile `protobuf:"bytes,1,rep,name=files" json:"files,omitempty"` + Version uint32 `protobuf:"varint,2,opt,name=version" json:"version,omitempty"` + Replication string `protobuf:"bytes,3,opt,name=replication" json:"replication,omitempty"` +} + +func (m *VolumeInfo) Reset() { *m = VolumeInfo{} } +func (m *VolumeInfo) String() string { return proto.CompactTextString(m) } +func (*VolumeInfo) ProtoMessage() {} +func (*VolumeInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{63} } + +func (m *VolumeInfo) GetFiles() []*RemoteFile { + if m != nil { + return m.Files + } + return nil +} + +func (m *VolumeInfo) GetVersion() uint32 { + if m != nil { + return m.Version + } + return 0 +} + +func (m *VolumeInfo) GetReplication() string { + if m != nil { + return m.Replication + } + return "" +} + +type VolumeTierMoveDatToRemoteRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + DestinationBackendName string `protobuf:"bytes,3,opt,name=destination_backend_name,json=destinationBackendName" json:"destination_backend_name,omitempty"` + KeepLocalDatFile bool `protobuf:"varint,4,opt,name=keep_local_dat_file,json=keepLocalDatFile" json:"keep_local_dat_file,omitempty"` +} + +func (m *VolumeTierMoveDatToRemoteRequest) Reset() { *m = VolumeTierMoveDatToRemoteRequest{} } +func (m *VolumeTierMoveDatToRemoteRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeTierMoveDatToRemoteRequest) ProtoMessage() {} +func (*VolumeTierMoveDatToRemoteRequest) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{64} +} + +func (m *VolumeTierMoveDatToRemoteRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeTierMoveDatToRemoteRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *VolumeTierMoveDatToRemoteRequest) GetDestinationBackendName() string { + if m != nil { + return m.DestinationBackendName + } + return "" +} + +func (m *VolumeTierMoveDatToRemoteRequest) GetKeepLocalDatFile() bool { + if m != nil { + return m.KeepLocalDatFile + } + return false +} + +type VolumeTierMoveDatToRemoteResponse struct { + Processed int64 `protobuf:"varint,1,opt,name=processed" json:"processed,omitempty"` + ProcessedPercentage float32 `protobuf:"fixed32,2,opt,name=processedPercentage" json:"processedPercentage,omitempty"` +} + +func (m *VolumeTierMoveDatToRemoteResponse) Reset() { *m = VolumeTierMoveDatToRemoteResponse{} } +func (m *VolumeTierMoveDatToRemoteResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeTierMoveDatToRemoteResponse) ProtoMessage() {} +func (*VolumeTierMoveDatToRemoteResponse) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{65} +} + +func (m *VolumeTierMoveDatToRemoteResponse) GetProcessed() int64 { + if m != nil { + return m.Processed + } + return 0 +} + +func (m *VolumeTierMoveDatToRemoteResponse) GetProcessedPercentage() float32 { + if m != nil { + return m.ProcessedPercentage + } + return 0 +} + +type VolumeTierMoveDatFromRemoteRequest struct { + VolumeId uint32 `protobuf:"varint,1,opt,name=volume_id,json=volumeId" json:"volume_id,omitempty"` + Collection string `protobuf:"bytes,2,opt,name=collection" json:"collection,omitempty"` + KeepRemoteDatFile bool `protobuf:"varint,3,opt,name=keep_remote_dat_file,json=keepRemoteDatFile" json:"keep_remote_dat_file,omitempty"` +} + +func (m *VolumeTierMoveDatFromRemoteRequest) Reset() { *m = VolumeTierMoveDatFromRemoteRequest{} } +func (m *VolumeTierMoveDatFromRemoteRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeTierMoveDatFromRemoteRequest) ProtoMessage() {} +func (*VolumeTierMoveDatFromRemoteRequest) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{66} +} + +func (m *VolumeTierMoveDatFromRemoteRequest) GetVolumeId() uint32 { + if m != nil { + return m.VolumeId + } + return 0 +} + +func (m *VolumeTierMoveDatFromRemoteRequest) GetCollection() string { + if m != nil { + return m.Collection + } + return "" +} + +func (m *VolumeTierMoveDatFromRemoteRequest) GetKeepRemoteDatFile() bool { + if m != nil { + return m.KeepRemoteDatFile + } + return false +} + +type VolumeTierMoveDatFromRemoteResponse struct { + Processed int64 `protobuf:"varint,1,opt,name=processed" json:"processed,omitempty"` + ProcessedPercentage float32 `protobuf:"fixed32,2,opt,name=processedPercentage" json:"processedPercentage,omitempty"` +} + +func (m *VolumeTierMoveDatFromRemoteResponse) Reset() { *m = VolumeTierMoveDatFromRemoteResponse{} } +func (m *VolumeTierMoveDatFromRemoteResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeTierMoveDatFromRemoteResponse) ProtoMessage() {} +func (*VolumeTierMoveDatFromRemoteResponse) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{67} +} + +func (m *VolumeTierMoveDatFromRemoteResponse) GetProcessed() int64 { + if m != nil { + return m.Processed + } + return 0 +} + +func (m *VolumeTierMoveDatFromRemoteResponse) GetProcessedPercentage() float32 { + if m != nil { + return m.ProcessedPercentage + } + return 0 +} + +type VolumeServerStatusRequest struct { +} + +func (m *VolumeServerStatusRequest) Reset() { *m = VolumeServerStatusRequest{} } +func (m *VolumeServerStatusRequest) String() string { return proto.CompactTextString(m) } +func (*VolumeServerStatusRequest) ProtoMessage() {} +func (*VolumeServerStatusRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{68} } + +type VolumeServerStatusResponse struct { + DiskStatuses []*DiskStatus `protobuf:"bytes,1,rep,name=disk_statuses,json=diskStatuses" json:"disk_statuses,omitempty"` + MemoryStatus *MemStatus `protobuf:"bytes,2,opt,name=memory_status,json=memoryStatus" json:"memory_status,omitempty"` +} + +func (m *VolumeServerStatusResponse) Reset() { *m = VolumeServerStatusResponse{} } +func (m *VolumeServerStatusResponse) String() string { return proto.CompactTextString(m) } +func (*VolumeServerStatusResponse) ProtoMessage() {} +func (*VolumeServerStatusResponse) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{69} } + +func (m *VolumeServerStatusResponse) GetDiskStatuses() []*DiskStatus { + if m != nil { + return m.DiskStatuses + } + return nil +} + +func (m *VolumeServerStatusResponse) GetMemoryStatus() *MemStatus { + if m != nil { + return m.MemoryStatus + } + return nil +} + +// select on volume servers +type QueryRequest struct { + Selections []string `protobuf:"bytes,1,rep,name=selections" json:"selections,omitempty"` + FromFileIds []string `protobuf:"bytes,2,rep,name=from_file_ids,json=fromFileIds" json:"from_file_ids,omitempty"` + Filter *QueryRequest_Filter `protobuf:"bytes,3,opt,name=filter" json:"filter,omitempty"` + InputSerialization *QueryRequest_InputSerialization `protobuf:"bytes,4,opt,name=input_serialization,json=inputSerialization" json:"input_serialization,omitempty"` + OutputSerialization *QueryRequest_OutputSerialization `protobuf:"bytes,5,opt,name=output_serialization,json=outputSerialization" json:"output_serialization,omitempty"` +} + +func (m *QueryRequest) Reset() { *m = QueryRequest{} } +func (m *QueryRequest) String() string { return proto.CompactTextString(m) } +func (*QueryRequest) ProtoMessage() {} +func (*QueryRequest) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{70} } + +func (m *QueryRequest) GetSelections() []string { + if m != nil { + return m.Selections + } + return nil +} + +func (m *QueryRequest) GetFromFileIds() []string { + if m != nil { + return m.FromFileIds + } + return nil +} + +func (m *QueryRequest) GetFilter() *QueryRequest_Filter { + if m != nil { + return m.Filter + } + return nil +} + +func (m *QueryRequest) GetInputSerialization() *QueryRequest_InputSerialization { + if m != nil { + return m.InputSerialization + } + return nil +} + +func (m *QueryRequest) GetOutputSerialization() *QueryRequest_OutputSerialization { + if m != nil { + return m.OutputSerialization + } + return nil +} + +type QueryRequest_Filter struct { + Field string `protobuf:"bytes,1,opt,name=field" json:"field,omitempty"` + Operand string `protobuf:"bytes,2,opt,name=operand" json:"operand,omitempty"` + Value string `protobuf:"bytes,3,opt,name=value" json:"value,omitempty"` +} + +func (m *QueryRequest_Filter) Reset() { *m = QueryRequest_Filter{} } +func (m *QueryRequest_Filter) String() string { return proto.CompactTextString(m) } +func (*QueryRequest_Filter) ProtoMessage() {} +func (*QueryRequest_Filter) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{70, 0} } + +func (m *QueryRequest_Filter) GetField() string { + if m != nil { + return m.Field + } + return "" +} + +func (m *QueryRequest_Filter) GetOperand() string { + if m != nil { + return m.Operand + } + return "" +} + +func (m *QueryRequest_Filter) GetValue() string { + if m != nil { + return m.Value + } + return "" +} + +type QueryRequest_InputSerialization struct { + // NONE | GZIP | BZIP2 + CompressionType string `protobuf:"bytes,1,opt,name=compression_type,json=compressionType" json:"compression_type,omitempty"` + CsvInput *QueryRequest_InputSerialization_CSVInput `protobuf:"bytes,2,opt,name=csv_input,json=csvInput" json:"csv_input,omitempty"` + JsonInput *QueryRequest_InputSerialization_JSONInput `protobuf:"bytes,3,opt,name=json_input,json=jsonInput" json:"json_input,omitempty"` + ParquetInput *QueryRequest_InputSerialization_ParquetInput `protobuf:"bytes,4,opt,name=parquet_input,json=parquetInput" json:"parquet_input,omitempty"` +} + +func (m *QueryRequest_InputSerialization) Reset() { *m = QueryRequest_InputSerialization{} } +func (m *QueryRequest_InputSerialization) String() string { return proto.CompactTextString(m) } +func (*QueryRequest_InputSerialization) ProtoMessage() {} +func (*QueryRequest_InputSerialization) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{70, 1} +} + +func (m *QueryRequest_InputSerialization) GetCompressionType() string { + if m != nil { + return m.CompressionType + } + return "" +} + +func (m *QueryRequest_InputSerialization) GetCsvInput() *QueryRequest_InputSerialization_CSVInput { + if m != nil { + return m.CsvInput + } + return nil +} + +func (m *QueryRequest_InputSerialization) GetJsonInput() *QueryRequest_InputSerialization_JSONInput { + if m != nil { + return m.JsonInput + } + return nil +} + +func (m *QueryRequest_InputSerialization) GetParquetInput() *QueryRequest_InputSerialization_ParquetInput { + if m != nil { + return m.ParquetInput + } + return nil +} + +type QueryRequest_InputSerialization_CSVInput struct { + FileHeaderInfo string `protobuf:"bytes,1,opt,name=file_header_info,json=fileHeaderInfo" json:"file_header_info,omitempty"` + RecordDelimiter string `protobuf:"bytes,2,opt,name=record_delimiter,json=recordDelimiter" json:"record_delimiter,omitempty"` + FieldDelimiter string `protobuf:"bytes,3,opt,name=field_delimiter,json=fieldDelimiter" json:"field_delimiter,omitempty"` + QuoteCharactoer string `protobuf:"bytes,4,opt,name=quote_charactoer,json=quoteCharactoer" json:"quote_charactoer,omitempty"` + QuoteEscapeCharacter string `protobuf:"bytes,5,opt,name=quote_escape_character,json=quoteEscapeCharacter" json:"quote_escape_character,omitempty"` + Comments string `protobuf:"bytes,6,opt,name=comments" json:"comments,omitempty"` + // If true, records might contain record delimiters within quote characters + AllowQuotedRecordDelimiter bool `protobuf:"varint,7,opt,name=allow_quoted_record_delimiter,json=allowQuotedRecordDelimiter" json:"allow_quoted_record_delimiter,omitempty"` +} + +func (m *QueryRequest_InputSerialization_CSVInput) Reset() { + *m = QueryRequest_InputSerialization_CSVInput{} +} +func (m *QueryRequest_InputSerialization_CSVInput) String() string { return proto.CompactTextString(m) } +func (*QueryRequest_InputSerialization_CSVInput) ProtoMessage() {} +func (*QueryRequest_InputSerialization_CSVInput) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{70, 1, 0} +} + +func (m *QueryRequest_InputSerialization_CSVInput) GetFileHeaderInfo() string { + if m != nil { + return m.FileHeaderInfo + } + return "" +} + +func (m *QueryRequest_InputSerialization_CSVInput) GetRecordDelimiter() string { + if m != nil { + return m.RecordDelimiter + } + return "" +} + +func (m *QueryRequest_InputSerialization_CSVInput) GetFieldDelimiter() string { + if m != nil { + return m.FieldDelimiter + } + return "" +} + +func (m *QueryRequest_InputSerialization_CSVInput) GetQuoteCharactoer() string { + if m != nil { + return m.QuoteCharactoer + } + return "" +} + +func (m *QueryRequest_InputSerialization_CSVInput) GetQuoteEscapeCharacter() string { + if m != nil { + return m.QuoteEscapeCharacter + } + return "" +} + +func (m *QueryRequest_InputSerialization_CSVInput) GetComments() string { + if m != nil { + return m.Comments + } + return "" +} + +func (m *QueryRequest_InputSerialization_CSVInput) GetAllowQuotedRecordDelimiter() bool { + if m != nil { + return m.AllowQuotedRecordDelimiter + } + return false +} + +type QueryRequest_InputSerialization_JSONInput struct { + Type string `protobuf:"bytes,1,opt,name=type" json:"type,omitempty"` +} + +func (m *QueryRequest_InputSerialization_JSONInput) Reset() { + *m = QueryRequest_InputSerialization_JSONInput{} +} +func (m *QueryRequest_InputSerialization_JSONInput) String() string { return proto.CompactTextString(m) } +func (*QueryRequest_InputSerialization_JSONInput) ProtoMessage() {} +func (*QueryRequest_InputSerialization_JSONInput) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{70, 1, 1} +} + +func (m *QueryRequest_InputSerialization_JSONInput) GetType() string { + if m != nil { + return m.Type + } + return "" +} + +type QueryRequest_InputSerialization_ParquetInput struct { +} + +func (m *QueryRequest_InputSerialization_ParquetInput) Reset() { + *m = QueryRequest_InputSerialization_ParquetInput{} +} +func (m *QueryRequest_InputSerialization_ParquetInput) String() string { + return proto.CompactTextString(m) +} +func (*QueryRequest_InputSerialization_ParquetInput) ProtoMessage() {} +func (*QueryRequest_InputSerialization_ParquetInput) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{70, 1, 2} +} + +type QueryRequest_OutputSerialization struct { + CsvOutput *QueryRequest_OutputSerialization_CSVOutput `protobuf:"bytes,2,opt,name=csv_output,json=csvOutput" json:"csv_output,omitempty"` + JsonOutput *QueryRequest_OutputSerialization_JSONOutput `protobuf:"bytes,3,opt,name=json_output,json=jsonOutput" json:"json_output,omitempty"` +} + +func (m *QueryRequest_OutputSerialization) Reset() { *m = QueryRequest_OutputSerialization{} } +func (m *QueryRequest_OutputSerialization) String() string { return proto.CompactTextString(m) } +func (*QueryRequest_OutputSerialization) ProtoMessage() {} +func (*QueryRequest_OutputSerialization) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{70, 2} +} + +func (m *QueryRequest_OutputSerialization) GetCsvOutput() *QueryRequest_OutputSerialization_CSVOutput { + if m != nil { + return m.CsvOutput + } + return nil +} + +func (m *QueryRequest_OutputSerialization) GetJsonOutput() *QueryRequest_OutputSerialization_JSONOutput { + if m != nil { + return m.JsonOutput + } + return nil +} + +type QueryRequest_OutputSerialization_CSVOutput struct { + QuoteFields string `protobuf:"bytes,1,opt,name=quote_fields,json=quoteFields" json:"quote_fields,omitempty"` + RecordDelimiter string `protobuf:"bytes,2,opt,name=record_delimiter,json=recordDelimiter" json:"record_delimiter,omitempty"` + FieldDelimiter string `protobuf:"bytes,3,opt,name=field_delimiter,json=fieldDelimiter" json:"field_delimiter,omitempty"` + QuoteCharactoer string `protobuf:"bytes,4,opt,name=quote_charactoer,json=quoteCharactoer" json:"quote_charactoer,omitempty"` + QuoteEscapeCharacter string `protobuf:"bytes,5,opt,name=quote_escape_character,json=quoteEscapeCharacter" json:"quote_escape_character,omitempty"` +} + +func (m *QueryRequest_OutputSerialization_CSVOutput) Reset() { + *m = QueryRequest_OutputSerialization_CSVOutput{} +} +func (m *QueryRequest_OutputSerialization_CSVOutput) String() string { + return proto.CompactTextString(m) +} +func (*QueryRequest_OutputSerialization_CSVOutput) ProtoMessage() {} +func (*QueryRequest_OutputSerialization_CSVOutput) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{70, 2, 0} +} + +func (m *QueryRequest_OutputSerialization_CSVOutput) GetQuoteFields() string { + if m != nil { + return m.QuoteFields + } + return "" +} + +func (m *QueryRequest_OutputSerialization_CSVOutput) GetRecordDelimiter() string { + if m != nil { + return m.RecordDelimiter + } + return "" +} + +func (m *QueryRequest_OutputSerialization_CSVOutput) GetFieldDelimiter() string { + if m != nil { + return m.FieldDelimiter + } + return "" +} + +func (m *QueryRequest_OutputSerialization_CSVOutput) GetQuoteCharactoer() string { + if m != nil { + return m.QuoteCharactoer + } + return "" +} + +func (m *QueryRequest_OutputSerialization_CSVOutput) GetQuoteEscapeCharacter() string { + if m != nil { + return m.QuoteEscapeCharacter + } + return "" +} + +type QueryRequest_OutputSerialization_JSONOutput struct { + RecordDelimiter string `protobuf:"bytes,1,opt,name=record_delimiter,json=recordDelimiter" json:"record_delimiter,omitempty"` +} + +func (m *QueryRequest_OutputSerialization_JSONOutput) Reset() { + *m = QueryRequest_OutputSerialization_JSONOutput{} +} +func (m *QueryRequest_OutputSerialization_JSONOutput) String() string { + return proto.CompactTextString(m) +} +func (*QueryRequest_OutputSerialization_JSONOutput) ProtoMessage() {} +func (*QueryRequest_OutputSerialization_JSONOutput) Descriptor() ([]byte, []int) { + return fileDescriptor0, []int{70, 2, 1} +} + +func (m *QueryRequest_OutputSerialization_JSONOutput) GetRecordDelimiter() string { + if m != nil { + return m.RecordDelimiter + } + return "" +} + +type QueriedStripe struct { + Records []byte `protobuf:"bytes,1,opt,name=records,proto3" json:"records,omitempty"` +} + +func (m *QueriedStripe) Reset() { *m = QueriedStripe{} } +func (m *QueriedStripe) String() string { return proto.CompactTextString(m) } +func (*QueriedStripe) ProtoMessage() {} +func (*QueriedStripe) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{71} } + +func (m *QueriedStripe) GetRecords() []byte { + if m != nil { + return m.Records + } + return nil +} + func init() { proto.RegisterType((*BatchDeleteRequest)(nil), "volume_server_pb.BatchDeleteRequest") proto.RegisterType((*BatchDeleteResponse)(nil), "volume_server_pb.BatchDeleteResponse") proto.RegisterType((*DeleteResult)(nil), "volume_server_pb.DeleteResult") + proto.RegisterType((*FileGetRequest)(nil), "volume_server_pb.FileGetRequest") + proto.RegisterType((*FileGetResponse)(nil), "volume_server_pb.FileGetResponse") proto.RegisterType((*Empty)(nil), "volume_server_pb.Empty") proto.RegisterType((*VacuumVolumeCheckRequest)(nil), "volume_server_pb.VacuumVolumeCheckRequest") proto.RegisterType((*VacuumVolumeCheckResponse)(nil), "volume_server_pb.VacuumVolumeCheckResponse") @@ -685,22 +2243,70 @@ func init() { proto.RegisterType((*VacuumVolumeCleanupResponse)(nil), "volume_server_pb.VacuumVolumeCleanupResponse") proto.RegisterType((*DeleteCollectionRequest)(nil), "volume_server_pb.DeleteCollectionRequest") proto.RegisterType((*DeleteCollectionResponse)(nil), "volume_server_pb.DeleteCollectionResponse") - proto.RegisterType((*AssignVolumeRequest)(nil), "volume_server_pb.AssignVolumeRequest") - proto.RegisterType((*AssignVolumeResponse)(nil), "volume_server_pb.AssignVolumeResponse") + proto.RegisterType((*AllocateVolumeRequest)(nil), "volume_server_pb.AllocateVolumeRequest") + proto.RegisterType((*AllocateVolumeResponse)(nil), "volume_server_pb.AllocateVolumeResponse") proto.RegisterType((*VolumeSyncStatusRequest)(nil), "volume_server_pb.VolumeSyncStatusRequest") proto.RegisterType((*VolumeSyncStatusResponse)(nil), "volume_server_pb.VolumeSyncStatusResponse") - proto.RegisterType((*VolumeSyncIndexRequest)(nil), "volume_server_pb.VolumeSyncIndexRequest") - proto.RegisterType((*VolumeSyncIndexResponse)(nil), "volume_server_pb.VolumeSyncIndexResponse") - proto.RegisterType((*VolumeSyncDataRequest)(nil), "volume_server_pb.VolumeSyncDataRequest") - proto.RegisterType((*VolumeSyncDataResponse)(nil), "volume_server_pb.VolumeSyncDataResponse") + proto.RegisterType((*VolumeIncrementalCopyRequest)(nil), "volume_server_pb.VolumeIncrementalCopyRequest") + proto.RegisterType((*VolumeIncrementalCopyResponse)(nil), "volume_server_pb.VolumeIncrementalCopyResponse") proto.RegisterType((*VolumeMountRequest)(nil), "volume_server_pb.VolumeMountRequest") proto.RegisterType((*VolumeMountResponse)(nil), "volume_server_pb.VolumeMountResponse") proto.RegisterType((*VolumeUnmountRequest)(nil), "volume_server_pb.VolumeUnmountRequest") proto.RegisterType((*VolumeUnmountResponse)(nil), "volume_server_pb.VolumeUnmountResponse") - proto.RegisterType((*VolumeUiPageRequest)(nil), "volume_server_pb.VolumeUiPageRequest") - proto.RegisterType((*VolumeUiPageResponse)(nil), "volume_server_pb.VolumeUiPageResponse") + proto.RegisterType((*VolumeDeleteRequest)(nil), "volume_server_pb.VolumeDeleteRequest") + proto.RegisterType((*VolumeDeleteResponse)(nil), "volume_server_pb.VolumeDeleteResponse") + proto.RegisterType((*VolumeMarkReadonlyRequest)(nil), "volume_server_pb.VolumeMarkReadonlyRequest") + proto.RegisterType((*VolumeMarkReadonlyResponse)(nil), "volume_server_pb.VolumeMarkReadonlyResponse") + proto.RegisterType((*VolumeConfigureRequest)(nil), "volume_server_pb.VolumeConfigureRequest") + proto.RegisterType((*VolumeConfigureResponse)(nil), "volume_server_pb.VolumeConfigureResponse") + proto.RegisterType((*VolumeCopyRequest)(nil), "volume_server_pb.VolumeCopyRequest") + proto.RegisterType((*VolumeCopyResponse)(nil), "volume_server_pb.VolumeCopyResponse") + proto.RegisterType((*CopyFileRequest)(nil), "volume_server_pb.CopyFileRequest") + proto.RegisterType((*CopyFileResponse)(nil), "volume_server_pb.CopyFileResponse") + proto.RegisterType((*VolumeTailSenderRequest)(nil), "volume_server_pb.VolumeTailSenderRequest") + proto.RegisterType((*VolumeTailSenderResponse)(nil), "volume_server_pb.VolumeTailSenderResponse") + proto.RegisterType((*VolumeTailReceiverRequest)(nil), "volume_server_pb.VolumeTailReceiverRequest") + proto.RegisterType((*VolumeTailReceiverResponse)(nil), "volume_server_pb.VolumeTailReceiverResponse") + proto.RegisterType((*VolumeEcShardsGenerateRequest)(nil), "volume_server_pb.VolumeEcShardsGenerateRequest") + proto.RegisterType((*VolumeEcShardsGenerateResponse)(nil), "volume_server_pb.VolumeEcShardsGenerateResponse") + proto.RegisterType((*VolumeEcShardsRebuildRequest)(nil), "volume_server_pb.VolumeEcShardsRebuildRequest") + proto.RegisterType((*VolumeEcShardsRebuildResponse)(nil), "volume_server_pb.VolumeEcShardsRebuildResponse") + proto.RegisterType((*VolumeEcShardsCopyRequest)(nil), "volume_server_pb.VolumeEcShardsCopyRequest") + proto.RegisterType((*VolumeEcShardsCopyResponse)(nil), "volume_server_pb.VolumeEcShardsCopyResponse") + proto.RegisterType((*VolumeEcShardsDeleteRequest)(nil), "volume_server_pb.VolumeEcShardsDeleteRequest") + proto.RegisterType((*VolumeEcShardsDeleteResponse)(nil), "volume_server_pb.VolumeEcShardsDeleteResponse") + proto.RegisterType((*VolumeEcShardsMountRequest)(nil), "volume_server_pb.VolumeEcShardsMountRequest") + proto.RegisterType((*VolumeEcShardsMountResponse)(nil), "volume_server_pb.VolumeEcShardsMountResponse") + proto.RegisterType((*VolumeEcShardsUnmountRequest)(nil), "volume_server_pb.VolumeEcShardsUnmountRequest") + proto.RegisterType((*VolumeEcShardsUnmountResponse)(nil), "volume_server_pb.VolumeEcShardsUnmountResponse") + proto.RegisterType((*VolumeEcShardReadRequest)(nil), "volume_server_pb.VolumeEcShardReadRequest") + proto.RegisterType((*VolumeEcShardReadResponse)(nil), "volume_server_pb.VolumeEcShardReadResponse") + proto.RegisterType((*VolumeEcBlobDeleteRequest)(nil), "volume_server_pb.VolumeEcBlobDeleteRequest") + proto.RegisterType((*VolumeEcBlobDeleteResponse)(nil), "volume_server_pb.VolumeEcBlobDeleteResponse") + proto.RegisterType((*VolumeEcShardsToVolumeRequest)(nil), "volume_server_pb.VolumeEcShardsToVolumeRequest") + proto.RegisterType((*VolumeEcShardsToVolumeResponse)(nil), "volume_server_pb.VolumeEcShardsToVolumeResponse") + proto.RegisterType((*ReadVolumeFileStatusRequest)(nil), "volume_server_pb.ReadVolumeFileStatusRequest") + proto.RegisterType((*ReadVolumeFileStatusResponse)(nil), "volume_server_pb.ReadVolumeFileStatusResponse") proto.RegisterType((*DiskStatus)(nil), "volume_server_pb.DiskStatus") proto.RegisterType((*MemStatus)(nil), "volume_server_pb.MemStatus") + proto.RegisterType((*RemoteFile)(nil), "volume_server_pb.RemoteFile") + proto.RegisterType((*VolumeInfo)(nil), "volume_server_pb.VolumeInfo") + proto.RegisterType((*VolumeTierMoveDatToRemoteRequest)(nil), "volume_server_pb.VolumeTierMoveDatToRemoteRequest") + proto.RegisterType((*VolumeTierMoveDatToRemoteResponse)(nil), "volume_server_pb.VolumeTierMoveDatToRemoteResponse") + proto.RegisterType((*VolumeTierMoveDatFromRemoteRequest)(nil), "volume_server_pb.VolumeTierMoveDatFromRemoteRequest") + proto.RegisterType((*VolumeTierMoveDatFromRemoteResponse)(nil), "volume_server_pb.VolumeTierMoveDatFromRemoteResponse") + proto.RegisterType((*VolumeServerStatusRequest)(nil), "volume_server_pb.VolumeServerStatusRequest") + proto.RegisterType((*VolumeServerStatusResponse)(nil), "volume_server_pb.VolumeServerStatusResponse") + proto.RegisterType((*QueryRequest)(nil), "volume_server_pb.QueryRequest") + proto.RegisterType((*QueryRequest_Filter)(nil), "volume_server_pb.QueryRequest.Filter") + proto.RegisterType((*QueryRequest_InputSerialization)(nil), "volume_server_pb.QueryRequest.InputSerialization") + proto.RegisterType((*QueryRequest_InputSerialization_CSVInput)(nil), "volume_server_pb.QueryRequest.InputSerialization.CSVInput") + proto.RegisterType((*QueryRequest_InputSerialization_JSONInput)(nil), "volume_server_pb.QueryRequest.InputSerialization.JSONInput") + proto.RegisterType((*QueryRequest_InputSerialization_ParquetInput)(nil), "volume_server_pb.QueryRequest.InputSerialization.ParquetInput") + proto.RegisterType((*QueryRequest_OutputSerialization)(nil), "volume_server_pb.QueryRequest.OutputSerialization") + proto.RegisterType((*QueryRequest_OutputSerialization_CSVOutput)(nil), "volume_server_pb.QueryRequest.OutputSerialization.CSVOutput") + proto.RegisterType((*QueryRequest_OutputSerialization_JSONOutput)(nil), "volume_server_pb.QueryRequest.OutputSerialization.JSONOutput") + proto.RegisterType((*QueriedStripe)(nil), "volume_server_pb.QueriedStripe") } // Reference imports to suppress errors if they are not otherwise used. @@ -716,17 +2322,42 @@ const _ = grpc.SupportPackageIsVersion4 type VolumeServerClient interface { // Experts only: takes multiple fid parameters. This function does not propagate deletes to replicas. BatchDelete(ctx context.Context, in *BatchDeleteRequest, opts ...grpc.CallOption) (*BatchDeleteResponse, error) + FileGet(ctx context.Context, in *FileGetRequest, opts ...grpc.CallOption) (VolumeServer_FileGetClient, error) VacuumVolumeCheck(ctx context.Context, in *VacuumVolumeCheckRequest, opts ...grpc.CallOption) (*VacuumVolumeCheckResponse, error) VacuumVolumeCompact(ctx context.Context, in *VacuumVolumeCompactRequest, opts ...grpc.CallOption) (*VacuumVolumeCompactResponse, error) VacuumVolumeCommit(ctx context.Context, in *VacuumVolumeCommitRequest, opts ...grpc.CallOption) (*VacuumVolumeCommitResponse, error) VacuumVolumeCleanup(ctx context.Context, in *VacuumVolumeCleanupRequest, opts ...grpc.CallOption) (*VacuumVolumeCleanupResponse, error) DeleteCollection(ctx context.Context, in *DeleteCollectionRequest, opts ...grpc.CallOption) (*DeleteCollectionResponse, error) - AssignVolume(ctx context.Context, in *AssignVolumeRequest, opts ...grpc.CallOption) (*AssignVolumeResponse, error) + AllocateVolume(ctx context.Context, in *AllocateVolumeRequest, opts ...grpc.CallOption) (*AllocateVolumeResponse, error) VolumeSyncStatus(ctx context.Context, in *VolumeSyncStatusRequest, opts ...grpc.CallOption) (*VolumeSyncStatusResponse, error) - VolumeSyncIndex(ctx context.Context, in *VolumeSyncIndexRequest, opts ...grpc.CallOption) (*VolumeSyncIndexResponse, error) - VolumeSyncData(ctx context.Context, in *VolumeSyncDataRequest, opts ...grpc.CallOption) (*VolumeSyncDataResponse, error) + VolumeIncrementalCopy(ctx context.Context, in *VolumeIncrementalCopyRequest, opts ...grpc.CallOption) (VolumeServer_VolumeIncrementalCopyClient, error) VolumeMount(ctx context.Context, in *VolumeMountRequest, opts ...grpc.CallOption) (*VolumeMountResponse, error) VolumeUnmount(ctx context.Context, in *VolumeUnmountRequest, opts ...grpc.CallOption) (*VolumeUnmountResponse, error) + VolumeDelete(ctx context.Context, in *VolumeDeleteRequest, opts ...grpc.CallOption) (*VolumeDeleteResponse, error) + VolumeMarkReadonly(ctx context.Context, in *VolumeMarkReadonlyRequest, opts ...grpc.CallOption) (*VolumeMarkReadonlyResponse, error) + VolumeConfigure(ctx context.Context, in *VolumeConfigureRequest, opts ...grpc.CallOption) (*VolumeConfigureResponse, error) + // copy the .idx .dat files, and mount this volume + VolumeCopy(ctx context.Context, in *VolumeCopyRequest, opts ...grpc.CallOption) (*VolumeCopyResponse, error) + ReadVolumeFileStatus(ctx context.Context, in *ReadVolumeFileStatusRequest, opts ...grpc.CallOption) (*ReadVolumeFileStatusResponse, error) + CopyFile(ctx context.Context, in *CopyFileRequest, opts ...grpc.CallOption) (VolumeServer_CopyFileClient, error) + VolumeTailSender(ctx context.Context, in *VolumeTailSenderRequest, opts ...grpc.CallOption) (VolumeServer_VolumeTailSenderClient, error) + VolumeTailReceiver(ctx context.Context, in *VolumeTailReceiverRequest, opts ...grpc.CallOption) (*VolumeTailReceiverResponse, error) + // erasure coding + VolumeEcShardsGenerate(ctx context.Context, in *VolumeEcShardsGenerateRequest, opts ...grpc.CallOption) (*VolumeEcShardsGenerateResponse, error) + VolumeEcShardsRebuild(ctx context.Context, in *VolumeEcShardsRebuildRequest, opts ...grpc.CallOption) (*VolumeEcShardsRebuildResponse, error) + VolumeEcShardsCopy(ctx context.Context, in *VolumeEcShardsCopyRequest, opts ...grpc.CallOption) (*VolumeEcShardsCopyResponse, error) + VolumeEcShardsDelete(ctx context.Context, in *VolumeEcShardsDeleteRequest, opts ...grpc.CallOption) (*VolumeEcShardsDeleteResponse, error) + VolumeEcShardsMount(ctx context.Context, in *VolumeEcShardsMountRequest, opts ...grpc.CallOption) (*VolumeEcShardsMountResponse, error) + VolumeEcShardsUnmount(ctx context.Context, in *VolumeEcShardsUnmountRequest, opts ...grpc.CallOption) (*VolumeEcShardsUnmountResponse, error) + VolumeEcShardRead(ctx context.Context, in *VolumeEcShardReadRequest, opts ...grpc.CallOption) (VolumeServer_VolumeEcShardReadClient, error) + VolumeEcBlobDelete(ctx context.Context, in *VolumeEcBlobDeleteRequest, opts ...grpc.CallOption) (*VolumeEcBlobDeleteResponse, error) + VolumeEcShardsToVolume(ctx context.Context, in *VolumeEcShardsToVolumeRequest, opts ...grpc.CallOption) (*VolumeEcShardsToVolumeResponse, error) + // tiered storage + VolumeTierMoveDatToRemote(ctx context.Context, in *VolumeTierMoveDatToRemoteRequest, opts ...grpc.CallOption) (VolumeServer_VolumeTierMoveDatToRemoteClient, error) + VolumeTierMoveDatFromRemote(ctx context.Context, in *VolumeTierMoveDatFromRemoteRequest, opts ...grpc.CallOption) (VolumeServer_VolumeTierMoveDatFromRemoteClient, error) + VolumeServerStatus(ctx context.Context, in *VolumeServerStatusRequest, opts ...grpc.CallOption) (*VolumeServerStatusResponse, error) + // <experimental> query + Query(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (VolumeServer_QueryClient, error) } type volumeServerClient struct { @@ -746,6 +2377,38 @@ func (c *volumeServerClient) BatchDelete(ctx context.Context, in *BatchDeleteReq return out, nil } +func (c *volumeServerClient) FileGet(ctx context.Context, in *FileGetRequest, opts ...grpc.CallOption) (VolumeServer_FileGetClient, error) { + stream, err := grpc.NewClientStream(ctx, &_VolumeServer_serviceDesc.Streams[0], c.cc, "/volume_server_pb.VolumeServer/FileGet", opts...) + if err != nil { + return nil, err + } + x := &volumeServerFileGetClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type VolumeServer_FileGetClient interface { + Recv() (*FileGetResponse, error) + grpc.ClientStream +} + +type volumeServerFileGetClient struct { + grpc.ClientStream +} + +func (x *volumeServerFileGetClient) Recv() (*FileGetResponse, error) { + m := new(FileGetResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + func (c *volumeServerClient) VacuumVolumeCheck(ctx context.Context, in *VacuumVolumeCheckRequest, opts ...grpc.CallOption) (*VacuumVolumeCheckResponse, error) { out := new(VacuumVolumeCheckResponse) err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VacuumVolumeCheck", in, out, c.cc, opts...) @@ -791,9 +2454,9 @@ func (c *volumeServerClient) DeleteCollection(ctx context.Context, in *DeleteCol return out, nil } -func (c *volumeServerClient) AssignVolume(ctx context.Context, in *AssignVolumeRequest, opts ...grpc.CallOption) (*AssignVolumeResponse, error) { - out := new(AssignVolumeResponse) - err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/AssignVolume", in, out, c.cc, opts...) +func (c *volumeServerClient) AllocateVolume(ctx context.Context, in *AllocateVolumeRequest, opts ...grpc.CallOption) (*AllocateVolumeResponse, error) { + out := new(AllocateVolumeResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/AllocateVolume", in, out, c.cc, opts...) if err != nil { return nil, err } @@ -809,22 +2472,36 @@ func (c *volumeServerClient) VolumeSyncStatus(ctx context.Context, in *VolumeSyn return out, nil } -func (c *volumeServerClient) VolumeSyncIndex(ctx context.Context, in *VolumeSyncIndexRequest, opts ...grpc.CallOption) (*VolumeSyncIndexResponse, error) { - out := new(VolumeSyncIndexResponse) - err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeSyncIndex", in, out, c.cc, opts...) +func (c *volumeServerClient) VolumeIncrementalCopy(ctx context.Context, in *VolumeIncrementalCopyRequest, opts ...grpc.CallOption) (VolumeServer_VolumeIncrementalCopyClient, error) { + stream, err := grpc.NewClientStream(ctx, &_VolumeServer_serviceDesc.Streams[1], c.cc, "/volume_server_pb.VolumeServer/VolumeIncrementalCopy", opts...) if err != nil { return nil, err } - return out, nil + x := &volumeServerVolumeIncrementalCopyClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil } -func (c *volumeServerClient) VolumeSyncData(ctx context.Context, in *VolumeSyncDataRequest, opts ...grpc.CallOption) (*VolumeSyncDataResponse, error) { - out := new(VolumeSyncDataResponse) - err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeSyncData", in, out, c.cc, opts...) - if err != nil { +type VolumeServer_VolumeIncrementalCopyClient interface { + Recv() (*VolumeIncrementalCopyResponse, error) + grpc.ClientStream +} + +type volumeServerVolumeIncrementalCopyClient struct { + grpc.ClientStream +} + +func (x *volumeServerVolumeIncrementalCopyClient) Recv() (*VolumeIncrementalCopyResponse, error) { + m := new(VolumeIncrementalCopyResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { return nil, err } - return out, nil + return m, nil } func (c *volumeServerClient) VolumeMount(ctx context.Context, in *VolumeMountRequest, opts ...grpc.CallOption) (*VolumeMountResponse, error) { @@ -845,22 +2522,374 @@ func (c *volumeServerClient) VolumeUnmount(ctx context.Context, in *VolumeUnmoun return out, nil } +func (c *volumeServerClient) VolumeDelete(ctx context.Context, in *VolumeDeleteRequest, opts ...grpc.CallOption) (*VolumeDeleteResponse, error) { + out := new(VolumeDeleteResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeDelete", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeMarkReadonly(ctx context.Context, in *VolumeMarkReadonlyRequest, opts ...grpc.CallOption) (*VolumeMarkReadonlyResponse, error) { + out := new(VolumeMarkReadonlyResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeMarkReadonly", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeConfigure(ctx context.Context, in *VolumeConfigureRequest, opts ...grpc.CallOption) (*VolumeConfigureResponse, error) { + out := new(VolumeConfigureResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeConfigure", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeCopy(ctx context.Context, in *VolumeCopyRequest, opts ...grpc.CallOption) (*VolumeCopyResponse, error) { + out := new(VolumeCopyResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeCopy", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) ReadVolumeFileStatus(ctx context.Context, in *ReadVolumeFileStatusRequest, opts ...grpc.CallOption) (*ReadVolumeFileStatusResponse, error) { + out := new(ReadVolumeFileStatusResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/ReadVolumeFileStatus", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) CopyFile(ctx context.Context, in *CopyFileRequest, opts ...grpc.CallOption) (VolumeServer_CopyFileClient, error) { + stream, err := grpc.NewClientStream(ctx, &_VolumeServer_serviceDesc.Streams[2], c.cc, "/volume_server_pb.VolumeServer/CopyFile", opts...) + if err != nil { + return nil, err + } + x := &volumeServerCopyFileClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type VolumeServer_CopyFileClient interface { + Recv() (*CopyFileResponse, error) + grpc.ClientStream +} + +type volumeServerCopyFileClient struct { + grpc.ClientStream +} + +func (x *volumeServerCopyFileClient) Recv() (*CopyFileResponse, error) { + m := new(CopyFileResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *volumeServerClient) VolumeTailSender(ctx context.Context, in *VolumeTailSenderRequest, opts ...grpc.CallOption) (VolumeServer_VolumeTailSenderClient, error) { + stream, err := grpc.NewClientStream(ctx, &_VolumeServer_serviceDesc.Streams[3], c.cc, "/volume_server_pb.VolumeServer/VolumeTailSender", opts...) + if err != nil { + return nil, err + } + x := &volumeServerVolumeTailSenderClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type VolumeServer_VolumeTailSenderClient interface { + Recv() (*VolumeTailSenderResponse, error) + grpc.ClientStream +} + +type volumeServerVolumeTailSenderClient struct { + grpc.ClientStream +} + +func (x *volumeServerVolumeTailSenderClient) Recv() (*VolumeTailSenderResponse, error) { + m := new(VolumeTailSenderResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *volumeServerClient) VolumeTailReceiver(ctx context.Context, in *VolumeTailReceiverRequest, opts ...grpc.CallOption) (*VolumeTailReceiverResponse, error) { + out := new(VolumeTailReceiverResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeTailReceiver", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeEcShardsGenerate(ctx context.Context, in *VolumeEcShardsGenerateRequest, opts ...grpc.CallOption) (*VolumeEcShardsGenerateResponse, error) { + out := new(VolumeEcShardsGenerateResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeEcShardsGenerate", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeEcShardsRebuild(ctx context.Context, in *VolumeEcShardsRebuildRequest, opts ...grpc.CallOption) (*VolumeEcShardsRebuildResponse, error) { + out := new(VolumeEcShardsRebuildResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeEcShardsRebuild", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeEcShardsCopy(ctx context.Context, in *VolumeEcShardsCopyRequest, opts ...grpc.CallOption) (*VolumeEcShardsCopyResponse, error) { + out := new(VolumeEcShardsCopyResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeEcShardsCopy", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeEcShardsDelete(ctx context.Context, in *VolumeEcShardsDeleteRequest, opts ...grpc.CallOption) (*VolumeEcShardsDeleteResponse, error) { + out := new(VolumeEcShardsDeleteResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeEcShardsDelete", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeEcShardsMount(ctx context.Context, in *VolumeEcShardsMountRequest, opts ...grpc.CallOption) (*VolumeEcShardsMountResponse, error) { + out := new(VolumeEcShardsMountResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeEcShardsMount", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeEcShardsUnmount(ctx context.Context, in *VolumeEcShardsUnmountRequest, opts ...grpc.CallOption) (*VolumeEcShardsUnmountResponse, error) { + out := new(VolumeEcShardsUnmountResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeEcShardsUnmount", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeEcShardRead(ctx context.Context, in *VolumeEcShardReadRequest, opts ...grpc.CallOption) (VolumeServer_VolumeEcShardReadClient, error) { + stream, err := grpc.NewClientStream(ctx, &_VolumeServer_serviceDesc.Streams[4], c.cc, "/volume_server_pb.VolumeServer/VolumeEcShardRead", opts...) + if err != nil { + return nil, err + } + x := &volumeServerVolumeEcShardReadClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type VolumeServer_VolumeEcShardReadClient interface { + Recv() (*VolumeEcShardReadResponse, error) + grpc.ClientStream +} + +type volumeServerVolumeEcShardReadClient struct { + grpc.ClientStream +} + +func (x *volumeServerVolumeEcShardReadClient) Recv() (*VolumeEcShardReadResponse, error) { + m := new(VolumeEcShardReadResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *volumeServerClient) VolumeEcBlobDelete(ctx context.Context, in *VolumeEcBlobDeleteRequest, opts ...grpc.CallOption) (*VolumeEcBlobDeleteResponse, error) { + out := new(VolumeEcBlobDeleteResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeEcBlobDelete", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeEcShardsToVolume(ctx context.Context, in *VolumeEcShardsToVolumeRequest, opts ...grpc.CallOption) (*VolumeEcShardsToVolumeResponse, error) { + out := new(VolumeEcShardsToVolumeResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeEcShardsToVolume", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) VolumeTierMoveDatToRemote(ctx context.Context, in *VolumeTierMoveDatToRemoteRequest, opts ...grpc.CallOption) (VolumeServer_VolumeTierMoveDatToRemoteClient, error) { + stream, err := grpc.NewClientStream(ctx, &_VolumeServer_serviceDesc.Streams[5], c.cc, "/volume_server_pb.VolumeServer/VolumeTierMoveDatToRemote", opts...) + if err != nil { + return nil, err + } + x := &volumeServerVolumeTierMoveDatToRemoteClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type VolumeServer_VolumeTierMoveDatToRemoteClient interface { + Recv() (*VolumeTierMoveDatToRemoteResponse, error) + grpc.ClientStream +} + +type volumeServerVolumeTierMoveDatToRemoteClient struct { + grpc.ClientStream +} + +func (x *volumeServerVolumeTierMoveDatToRemoteClient) Recv() (*VolumeTierMoveDatToRemoteResponse, error) { + m := new(VolumeTierMoveDatToRemoteResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *volumeServerClient) VolumeTierMoveDatFromRemote(ctx context.Context, in *VolumeTierMoveDatFromRemoteRequest, opts ...grpc.CallOption) (VolumeServer_VolumeTierMoveDatFromRemoteClient, error) { + stream, err := grpc.NewClientStream(ctx, &_VolumeServer_serviceDesc.Streams[6], c.cc, "/volume_server_pb.VolumeServer/VolumeTierMoveDatFromRemote", opts...) + if err != nil { + return nil, err + } + x := &volumeServerVolumeTierMoveDatFromRemoteClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type VolumeServer_VolumeTierMoveDatFromRemoteClient interface { + Recv() (*VolumeTierMoveDatFromRemoteResponse, error) + grpc.ClientStream +} + +type volumeServerVolumeTierMoveDatFromRemoteClient struct { + grpc.ClientStream +} + +func (x *volumeServerVolumeTierMoveDatFromRemoteClient) Recv() (*VolumeTierMoveDatFromRemoteResponse, error) { + m := new(VolumeTierMoveDatFromRemoteResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *volumeServerClient) VolumeServerStatus(ctx context.Context, in *VolumeServerStatusRequest, opts ...grpc.CallOption) (*VolumeServerStatusResponse, error) { + out := new(VolumeServerStatusResponse) + err := grpc.Invoke(ctx, "/volume_server_pb.VolumeServer/VolumeServerStatus", in, out, c.cc, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *volumeServerClient) Query(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (VolumeServer_QueryClient, error) { + stream, err := grpc.NewClientStream(ctx, &_VolumeServer_serviceDesc.Streams[7], c.cc, "/volume_server_pb.VolumeServer/Query", opts...) + if err != nil { + return nil, err + } + x := &volumeServerQueryClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type VolumeServer_QueryClient interface { + Recv() (*QueriedStripe, error) + grpc.ClientStream +} + +type volumeServerQueryClient struct { + grpc.ClientStream +} + +func (x *volumeServerQueryClient) Recv() (*QueriedStripe, error) { + m := new(QueriedStripe) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + // Server API for VolumeServer service type VolumeServerServer interface { // Experts only: takes multiple fid parameters. This function does not propagate deletes to replicas. BatchDelete(context.Context, *BatchDeleteRequest) (*BatchDeleteResponse, error) + FileGet(*FileGetRequest, VolumeServer_FileGetServer) error VacuumVolumeCheck(context.Context, *VacuumVolumeCheckRequest) (*VacuumVolumeCheckResponse, error) VacuumVolumeCompact(context.Context, *VacuumVolumeCompactRequest) (*VacuumVolumeCompactResponse, error) VacuumVolumeCommit(context.Context, *VacuumVolumeCommitRequest) (*VacuumVolumeCommitResponse, error) VacuumVolumeCleanup(context.Context, *VacuumVolumeCleanupRequest) (*VacuumVolumeCleanupResponse, error) DeleteCollection(context.Context, *DeleteCollectionRequest) (*DeleteCollectionResponse, error) - AssignVolume(context.Context, *AssignVolumeRequest) (*AssignVolumeResponse, error) + AllocateVolume(context.Context, *AllocateVolumeRequest) (*AllocateVolumeResponse, error) VolumeSyncStatus(context.Context, *VolumeSyncStatusRequest) (*VolumeSyncStatusResponse, error) - VolumeSyncIndex(context.Context, *VolumeSyncIndexRequest) (*VolumeSyncIndexResponse, error) - VolumeSyncData(context.Context, *VolumeSyncDataRequest) (*VolumeSyncDataResponse, error) + VolumeIncrementalCopy(*VolumeIncrementalCopyRequest, VolumeServer_VolumeIncrementalCopyServer) error VolumeMount(context.Context, *VolumeMountRequest) (*VolumeMountResponse, error) VolumeUnmount(context.Context, *VolumeUnmountRequest) (*VolumeUnmountResponse, error) + VolumeDelete(context.Context, *VolumeDeleteRequest) (*VolumeDeleteResponse, error) + VolumeMarkReadonly(context.Context, *VolumeMarkReadonlyRequest) (*VolumeMarkReadonlyResponse, error) + VolumeConfigure(context.Context, *VolumeConfigureRequest) (*VolumeConfigureResponse, error) + // copy the .idx .dat files, and mount this volume + VolumeCopy(context.Context, *VolumeCopyRequest) (*VolumeCopyResponse, error) + ReadVolumeFileStatus(context.Context, *ReadVolumeFileStatusRequest) (*ReadVolumeFileStatusResponse, error) + CopyFile(*CopyFileRequest, VolumeServer_CopyFileServer) error + VolumeTailSender(*VolumeTailSenderRequest, VolumeServer_VolumeTailSenderServer) error + VolumeTailReceiver(context.Context, *VolumeTailReceiverRequest) (*VolumeTailReceiverResponse, error) + // erasure coding + VolumeEcShardsGenerate(context.Context, *VolumeEcShardsGenerateRequest) (*VolumeEcShardsGenerateResponse, error) + VolumeEcShardsRebuild(context.Context, *VolumeEcShardsRebuildRequest) (*VolumeEcShardsRebuildResponse, error) + VolumeEcShardsCopy(context.Context, *VolumeEcShardsCopyRequest) (*VolumeEcShardsCopyResponse, error) + VolumeEcShardsDelete(context.Context, *VolumeEcShardsDeleteRequest) (*VolumeEcShardsDeleteResponse, error) + VolumeEcShardsMount(context.Context, *VolumeEcShardsMountRequest) (*VolumeEcShardsMountResponse, error) + VolumeEcShardsUnmount(context.Context, *VolumeEcShardsUnmountRequest) (*VolumeEcShardsUnmountResponse, error) + VolumeEcShardRead(*VolumeEcShardReadRequest, VolumeServer_VolumeEcShardReadServer) error + VolumeEcBlobDelete(context.Context, *VolumeEcBlobDeleteRequest) (*VolumeEcBlobDeleteResponse, error) + VolumeEcShardsToVolume(context.Context, *VolumeEcShardsToVolumeRequest) (*VolumeEcShardsToVolumeResponse, error) + // tiered storage + VolumeTierMoveDatToRemote(*VolumeTierMoveDatToRemoteRequest, VolumeServer_VolumeTierMoveDatToRemoteServer) error + VolumeTierMoveDatFromRemote(*VolumeTierMoveDatFromRemoteRequest, VolumeServer_VolumeTierMoveDatFromRemoteServer) error + VolumeServerStatus(context.Context, *VolumeServerStatusRequest) (*VolumeServerStatusResponse, error) + // <experimental> query + Query(*QueryRequest, VolumeServer_QueryServer) error } func RegisterVolumeServerServer(s *grpc.Server, srv VolumeServerServer) { @@ -885,6 +2914,27 @@ func _VolumeServer_BatchDelete_Handler(srv interface{}, ctx context.Context, dec return interceptor(ctx, in, info, handler) } +func _VolumeServer_FileGet_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(FileGetRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(VolumeServerServer).FileGet(m, &volumeServerFileGetServer{stream}) +} + +type VolumeServer_FileGetServer interface { + Send(*FileGetResponse) error + grpc.ServerStream +} + +type volumeServerFileGetServer struct { + grpc.ServerStream +} + +func (x *volumeServerFileGetServer) Send(m *FileGetResponse) error { + return x.ServerStream.SendMsg(m) +} + func _VolumeServer_VacuumVolumeCheck_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(VacuumVolumeCheckRequest) if err := dec(in); err != nil { @@ -975,20 +3025,20 @@ func _VolumeServer_DeleteCollection_Handler(srv interface{}, ctx context.Context return interceptor(ctx, in, info, handler) } -func _VolumeServer_AssignVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(AssignVolumeRequest) +func _VolumeServer_AllocateVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(AllocateVolumeRequest) if err := dec(in); err != nil { return nil, err } if interceptor == nil { - return srv.(VolumeServerServer).AssignVolume(ctx, in) + return srv.(VolumeServerServer).AllocateVolume(ctx, in) } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/volume_server_pb.VolumeServer/AssignVolume", + FullMethod: "/volume_server_pb.VolumeServer/AllocateVolume", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(VolumeServerServer).AssignVolume(ctx, req.(*AssignVolumeRequest)) + return srv.(VolumeServerServer).AllocateVolume(ctx, req.(*AllocateVolumeRequest)) } return interceptor(ctx, in, info, handler) } @@ -1011,78 +3061,459 @@ func _VolumeServer_VolumeSyncStatus_Handler(srv interface{}, ctx context.Context return interceptor(ctx, in, info, handler) } -func _VolumeServer_VolumeSyncIndex_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(VolumeSyncIndexRequest) +func _VolumeServer_VolumeIncrementalCopy_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(VolumeIncrementalCopyRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(VolumeServerServer).VolumeIncrementalCopy(m, &volumeServerVolumeIncrementalCopyServer{stream}) +} + +type VolumeServer_VolumeIncrementalCopyServer interface { + Send(*VolumeIncrementalCopyResponse) error + grpc.ServerStream +} + +type volumeServerVolumeIncrementalCopyServer struct { + grpc.ServerStream +} + +func (x *volumeServerVolumeIncrementalCopyServer) Send(m *VolumeIncrementalCopyResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _VolumeServer_VolumeMount_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeMountRequest) if err := dec(in); err != nil { return nil, err } if interceptor == nil { - return srv.(VolumeServerServer).VolumeSyncIndex(ctx, in) + return srv.(VolumeServerServer).VolumeMount(ctx, in) } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/volume_server_pb.VolumeServer/VolumeSyncIndex", + FullMethod: "/volume_server_pb.VolumeServer/VolumeMount", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(VolumeServerServer).VolumeSyncIndex(ctx, req.(*VolumeSyncIndexRequest)) + return srv.(VolumeServerServer).VolumeMount(ctx, req.(*VolumeMountRequest)) } return interceptor(ctx, in, info, handler) } -func _VolumeServer_VolumeSyncData_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(VolumeSyncDataRequest) +func _VolumeServer_VolumeUnmount_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeUnmountRequest) if err := dec(in); err != nil { return nil, err } if interceptor == nil { - return srv.(VolumeServerServer).VolumeSyncData(ctx, in) + return srv.(VolumeServerServer).VolumeUnmount(ctx, in) } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/volume_server_pb.VolumeServer/VolumeSyncData", + FullMethod: "/volume_server_pb.VolumeServer/VolumeUnmount", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(VolumeServerServer).VolumeSyncData(ctx, req.(*VolumeSyncDataRequest)) + return srv.(VolumeServerServer).VolumeUnmount(ctx, req.(*VolumeUnmountRequest)) } return interceptor(ctx, in, info, handler) } -func _VolumeServer_VolumeMount_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(VolumeMountRequest) +func _VolumeServer_VolumeDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeDeleteRequest) if err := dec(in); err != nil { return nil, err } if interceptor == nil { - return srv.(VolumeServerServer).VolumeMount(ctx, in) + return srv.(VolumeServerServer).VolumeDelete(ctx, in) } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/volume_server_pb.VolumeServer/VolumeMount", + FullMethod: "/volume_server_pb.VolumeServer/VolumeDelete", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(VolumeServerServer).VolumeMount(ctx, req.(*VolumeMountRequest)) + return srv.(VolumeServerServer).VolumeDelete(ctx, req.(*VolumeDeleteRequest)) } return interceptor(ctx, in, info, handler) } -func _VolumeServer_VolumeUnmount_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(VolumeUnmountRequest) +func _VolumeServer_VolumeMarkReadonly_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeMarkReadonlyRequest) if err := dec(in); err != nil { return nil, err } if interceptor == nil { - return srv.(VolumeServerServer).VolumeUnmount(ctx, in) + return srv.(VolumeServerServer).VolumeMarkReadonly(ctx, in) } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/volume_server_pb.VolumeServer/VolumeUnmount", + FullMethod: "/volume_server_pb.VolumeServer/VolumeMarkReadonly", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(VolumeServerServer).VolumeUnmount(ctx, req.(*VolumeUnmountRequest)) + return srv.(VolumeServerServer).VolumeMarkReadonly(ctx, req.(*VolumeMarkReadonlyRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeConfigure_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeConfigureRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeConfigure(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeConfigure", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeConfigure(ctx, req.(*VolumeConfigureRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeCopy_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeCopyRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeCopy(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeCopy", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeCopy(ctx, req.(*VolumeCopyRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_ReadVolumeFileStatus_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ReadVolumeFileStatusRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).ReadVolumeFileStatus(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/ReadVolumeFileStatus", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).ReadVolumeFileStatus(ctx, req.(*ReadVolumeFileStatusRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_CopyFile_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(CopyFileRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(VolumeServerServer).CopyFile(m, &volumeServerCopyFileServer{stream}) +} + +type VolumeServer_CopyFileServer interface { + Send(*CopyFileResponse) error + grpc.ServerStream +} + +type volumeServerCopyFileServer struct { + grpc.ServerStream +} + +func (x *volumeServerCopyFileServer) Send(m *CopyFileResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _VolumeServer_VolumeTailSender_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(VolumeTailSenderRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(VolumeServerServer).VolumeTailSender(m, &volumeServerVolumeTailSenderServer{stream}) +} + +type VolumeServer_VolumeTailSenderServer interface { + Send(*VolumeTailSenderResponse) error + grpc.ServerStream +} + +type volumeServerVolumeTailSenderServer struct { + grpc.ServerStream +} + +func (x *volumeServerVolumeTailSenderServer) Send(m *VolumeTailSenderResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _VolumeServer_VolumeTailReceiver_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeTailReceiverRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeTailReceiver(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeTailReceiver", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeTailReceiver(ctx, req.(*VolumeTailReceiverRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeEcShardsGenerate_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeEcShardsGenerateRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeEcShardsGenerate(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeEcShardsGenerate", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeEcShardsGenerate(ctx, req.(*VolumeEcShardsGenerateRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeEcShardsRebuild_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeEcShardsRebuildRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeEcShardsRebuild(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeEcShardsRebuild", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeEcShardsRebuild(ctx, req.(*VolumeEcShardsRebuildRequest)) } return interceptor(ctx, in, info, handler) } +func _VolumeServer_VolumeEcShardsCopy_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeEcShardsCopyRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeEcShardsCopy(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeEcShardsCopy", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeEcShardsCopy(ctx, req.(*VolumeEcShardsCopyRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeEcShardsDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeEcShardsDeleteRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeEcShardsDelete(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeEcShardsDelete", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeEcShardsDelete(ctx, req.(*VolumeEcShardsDeleteRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeEcShardsMount_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeEcShardsMountRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeEcShardsMount(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeEcShardsMount", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeEcShardsMount(ctx, req.(*VolumeEcShardsMountRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeEcShardsUnmount_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeEcShardsUnmountRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeEcShardsUnmount(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeEcShardsUnmount", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeEcShardsUnmount(ctx, req.(*VolumeEcShardsUnmountRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeEcShardRead_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(VolumeEcShardReadRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(VolumeServerServer).VolumeEcShardRead(m, &volumeServerVolumeEcShardReadServer{stream}) +} + +type VolumeServer_VolumeEcShardReadServer interface { + Send(*VolumeEcShardReadResponse) error + grpc.ServerStream +} + +type volumeServerVolumeEcShardReadServer struct { + grpc.ServerStream +} + +func (x *volumeServerVolumeEcShardReadServer) Send(m *VolumeEcShardReadResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _VolumeServer_VolumeEcBlobDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeEcBlobDeleteRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeEcBlobDelete(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeEcBlobDelete", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeEcBlobDelete(ctx, req.(*VolumeEcBlobDeleteRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeEcShardsToVolume_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeEcShardsToVolumeRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeEcShardsToVolume(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeEcShardsToVolume", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeEcShardsToVolume(ctx, req.(*VolumeEcShardsToVolumeRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_VolumeTierMoveDatToRemote_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(VolumeTierMoveDatToRemoteRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(VolumeServerServer).VolumeTierMoveDatToRemote(m, &volumeServerVolumeTierMoveDatToRemoteServer{stream}) +} + +type VolumeServer_VolumeTierMoveDatToRemoteServer interface { + Send(*VolumeTierMoveDatToRemoteResponse) error + grpc.ServerStream +} + +type volumeServerVolumeTierMoveDatToRemoteServer struct { + grpc.ServerStream +} + +func (x *volumeServerVolumeTierMoveDatToRemoteServer) Send(m *VolumeTierMoveDatToRemoteResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _VolumeServer_VolumeTierMoveDatFromRemote_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(VolumeTierMoveDatFromRemoteRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(VolumeServerServer).VolumeTierMoveDatFromRemote(m, &volumeServerVolumeTierMoveDatFromRemoteServer{stream}) +} + +type VolumeServer_VolumeTierMoveDatFromRemoteServer interface { + Send(*VolumeTierMoveDatFromRemoteResponse) error + grpc.ServerStream +} + +type volumeServerVolumeTierMoveDatFromRemoteServer struct { + grpc.ServerStream +} + +func (x *volumeServerVolumeTierMoveDatFromRemoteServer) Send(m *VolumeTierMoveDatFromRemoteResponse) error { + return x.ServerStream.SendMsg(m) +} + +func _VolumeServer_VolumeServerStatus_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(VolumeServerStatusRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(VolumeServerServer).VolumeServerStatus(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/volume_server_pb.VolumeServer/VolumeServerStatus", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(VolumeServerServer).VolumeServerStatus(ctx, req.(*VolumeServerStatusRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _VolumeServer_Query_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(QueryRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(VolumeServerServer).Query(m, &volumeServerQueryServer{stream}) +} + +type VolumeServer_QueryServer interface { + Send(*QueriedStripe) error + grpc.ServerStream +} + +type volumeServerQueryServer struct { + grpc.ServerStream +} + +func (x *volumeServerQueryServer) Send(m *QueriedStripe) error { + return x.ServerStream.SendMsg(m) +} + var _VolumeServer_serviceDesc = grpc.ServiceDesc{ ServiceName: "volume_server_pb.VolumeServer", HandlerType: (*VolumeServerServer)(nil), @@ -1112,22 +3543,14 @@ var _VolumeServer_serviceDesc = grpc.ServiceDesc{ Handler: _VolumeServer_DeleteCollection_Handler, }, { - MethodName: "AssignVolume", - Handler: _VolumeServer_AssignVolume_Handler, + MethodName: "AllocateVolume", + Handler: _VolumeServer_AllocateVolume_Handler, }, { MethodName: "VolumeSyncStatus", Handler: _VolumeServer_VolumeSyncStatus_Handler, }, { - MethodName: "VolumeSyncIndex", - Handler: _VolumeServer_VolumeSyncIndex_Handler, - }, - { - MethodName: "VolumeSyncData", - Handler: _VolumeServer_VolumeSyncData_Handler, - }, - { MethodName: "VolumeMount", Handler: _VolumeServer_VolumeMount_Handler, }, @@ -1135,77 +3558,319 @@ var _VolumeServer_serviceDesc = grpc.ServiceDesc{ MethodName: "VolumeUnmount", Handler: _VolumeServer_VolumeUnmount_Handler, }, + { + MethodName: "VolumeDelete", + Handler: _VolumeServer_VolumeDelete_Handler, + }, + { + MethodName: "VolumeMarkReadonly", + Handler: _VolumeServer_VolumeMarkReadonly_Handler, + }, + { + MethodName: "VolumeConfigure", + Handler: _VolumeServer_VolumeConfigure_Handler, + }, + { + MethodName: "VolumeCopy", + Handler: _VolumeServer_VolumeCopy_Handler, + }, + { + MethodName: "ReadVolumeFileStatus", + Handler: _VolumeServer_ReadVolumeFileStatus_Handler, + }, + { + MethodName: "VolumeTailReceiver", + Handler: _VolumeServer_VolumeTailReceiver_Handler, + }, + { + MethodName: "VolumeEcShardsGenerate", + Handler: _VolumeServer_VolumeEcShardsGenerate_Handler, + }, + { + MethodName: "VolumeEcShardsRebuild", + Handler: _VolumeServer_VolumeEcShardsRebuild_Handler, + }, + { + MethodName: "VolumeEcShardsCopy", + Handler: _VolumeServer_VolumeEcShardsCopy_Handler, + }, + { + MethodName: "VolumeEcShardsDelete", + Handler: _VolumeServer_VolumeEcShardsDelete_Handler, + }, + { + MethodName: "VolumeEcShardsMount", + Handler: _VolumeServer_VolumeEcShardsMount_Handler, + }, + { + MethodName: "VolumeEcShardsUnmount", + Handler: _VolumeServer_VolumeEcShardsUnmount_Handler, + }, + { + MethodName: "VolumeEcBlobDelete", + Handler: _VolumeServer_VolumeEcBlobDelete_Handler, + }, + { + MethodName: "VolumeEcShardsToVolume", + Handler: _VolumeServer_VolumeEcShardsToVolume_Handler, + }, + { + MethodName: "VolumeServerStatus", + Handler: _VolumeServer_VolumeServerStatus_Handler, + }, + }, + Streams: []grpc.StreamDesc{ + { + StreamName: "FileGet", + Handler: _VolumeServer_FileGet_Handler, + ServerStreams: true, + }, + { + StreamName: "VolumeIncrementalCopy", + Handler: _VolumeServer_VolumeIncrementalCopy_Handler, + ServerStreams: true, + }, + { + StreamName: "CopyFile", + Handler: _VolumeServer_CopyFile_Handler, + ServerStreams: true, + }, + { + StreamName: "VolumeTailSender", + Handler: _VolumeServer_VolumeTailSender_Handler, + ServerStreams: true, + }, + { + StreamName: "VolumeEcShardRead", + Handler: _VolumeServer_VolumeEcShardRead_Handler, + ServerStreams: true, + }, + { + StreamName: "VolumeTierMoveDatToRemote", + Handler: _VolumeServer_VolumeTierMoveDatToRemote_Handler, + ServerStreams: true, + }, + { + StreamName: "VolumeTierMoveDatFromRemote", + Handler: _VolumeServer_VolumeTierMoveDatFromRemote_Handler, + ServerStreams: true, + }, + { + StreamName: "Query", + Handler: _VolumeServer_Query_Handler, + ServerStreams: true, + }, }, - Streams: []grpc.StreamDesc{}, Metadata: "volume_server.proto", } func init() { proto.RegisterFile("volume_server.proto", fileDescriptor0) } var fileDescriptor0 = []byte{ - // 1013 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x9c, 0x57, 0xdf, 0x72, 0xdb, 0xc4, - 0x17, 0x8e, 0x62, 0x3b, 0x76, 0x8e, 0xed, 0x5f, 0xfd, 0x5b, 0xa7, 0x89, 0xaa, 0x42, 0x30, 0x02, - 0x5a, 0xa7, 0x0d, 0x61, 0x68, 0x07, 0x28, 0xc3, 0x0d, 0x90, 0x00, 0x93, 0x8b, 0x4e, 0x99, 0xcd, - 0xb4, 0xc3, 0x0c, 0x9d, 0xf1, 0x28, 0xd2, 0x3a, 0xd9, 0x89, 0x2c, 0xa9, 0xda, 0x55, 0x26, 0xe5, - 0x0d, 0x78, 0x04, 0xae, 0xb9, 0xe1, 0x9d, 0x78, 0x19, 0x66, 0xff, 0x48, 0xd1, 0x3f, 0x57, 0x82, - 0xbb, 0xd5, 0xb7, 0xe7, 0x9c, 0x6f, 0xcf, 0xd9, 0xb3, 0xe7, 0xb3, 0x61, 0x7a, 0x1d, 0xfa, 0xc9, - 0x8a, 0x2c, 0x18, 0x89, 0xaf, 0x49, 0x7c, 0x14, 0xc5, 0x21, 0x0f, 0xd1, 0xa4, 0x00, 0x2e, 0xa2, - 0x73, 0xfb, 0x33, 0x40, 0xdf, 0x3b, 0xdc, 0xbd, 0x3c, 0x21, 0x3e, 0xe1, 0x04, 0x93, 0x37, 0x09, - 0x61, 0x1c, 0xdd, 0x83, 0xc1, 0x92, 0xfa, 0x64, 0x41, 0x3d, 0x66, 0x1a, 0xb3, 0xce, 0x7c, 0x1b, - 0xf7, 0xc5, 0xf7, 0xa9, 0xc7, 0xec, 0x17, 0x30, 0x2d, 0x38, 0xb0, 0x28, 0x0c, 0x18, 0x41, 0xcf, - 0xa0, 0x1f, 0x13, 0x96, 0xf8, 0x5c, 0x39, 0x0c, 0x9f, 0xec, 0x1f, 0x95, 0xb9, 0x8e, 0x32, 0x97, - 0xc4, 0xe7, 0x38, 0x35, 0xb7, 0x29, 0x8c, 0xf2, 0x1b, 0x68, 0x0f, 0xfa, 0x9a, 0xdb, 0x34, 0x66, - 0xc6, 0x7c, 0x1b, 0x6f, 0x29, 0x6a, 0xb4, 0x0b, 0x5b, 0x8c, 0x3b, 0x3c, 0x61, 0xe6, 0xe6, 0xcc, - 0x98, 0xf7, 0xb0, 0xfe, 0x42, 0x3b, 0xd0, 0x23, 0x71, 0x1c, 0xc6, 0x66, 0x47, 0x9a, 0xab, 0x0f, - 0x84, 0xa0, 0xcb, 0xe8, 0x6f, 0xc4, 0xec, 0xce, 0x8c, 0xf9, 0x18, 0xcb, 0xb5, 0xdd, 0x87, 0xde, - 0x0f, 0xab, 0x88, 0xbf, 0xb5, 0xbf, 0x02, 0xf3, 0x95, 0xe3, 0x26, 0xc9, 0xea, 0x95, 0x3c, 0xe3, - 0xf1, 0x25, 0x71, 0xaf, 0xd2, 0xdc, 0xef, 0xc3, 0xb6, 0x3c, 0xb9, 0x97, 0x9e, 0x60, 0x8c, 0x07, - 0x0a, 0x38, 0xf5, 0xec, 0x6f, 0xe1, 0x5e, 0x8d, 0xa3, 0xae, 0xc1, 0x47, 0x30, 0xbe, 0x70, 0xe2, - 0x73, 0xe7, 0x82, 0x2c, 0x62, 0x87, 0xd3, 0x50, 0x7a, 0x1b, 0x78, 0xa4, 0x41, 0x2c, 0x30, 0xfb, - 0x57, 0xb0, 0x0a, 0x11, 0xc2, 0x55, 0xe4, 0xb8, 0xbc, 0x0d, 0x39, 0x9a, 0xc1, 0x30, 0x8a, 0x89, - 0xe3, 0xfb, 0xa1, 0xeb, 0x70, 0x22, 0xab, 0xd0, 0xc1, 0x79, 0xc8, 0x7e, 0x1f, 0xee, 0xd7, 0x06, - 0x57, 0x07, 0xb4, 0x9f, 0x95, 0x4e, 0x1f, 0xae, 0x56, 0xb4, 0x15, 0xb5, 0xfd, 0x5e, 0xe5, 0xd4, - 0xd2, 0x53, 0xc7, 0xfd, 0xba, 0xb4, 0xeb, 0x13, 0x27, 0x48, 0xa2, 0x56, 0x81, 0xcb, 0x27, 0x4e, - 0x5d, 0xb3, 0xc8, 0x7b, 0xaa, 0x39, 0x8e, 0x43, 0xdf, 0x27, 0x2e, 0xa7, 0x61, 0x90, 0x86, 0xdd, - 0x07, 0x70, 0x33, 0x50, 0xb7, 0x4a, 0x0e, 0xb1, 0x2d, 0x30, 0xab, 0xae, 0x3a, 0xec, 0x5f, 0x06, - 0x4c, 0xbf, 0x63, 0x8c, 0x5e, 0x04, 0x8a, 0xb6, 0x55, 0xf9, 0x8b, 0x84, 0x9b, 0x65, 0xc2, 0xf2, - 0xf5, 0x74, 0x2a, 0xd7, 0x23, 0x2c, 0x62, 0x12, 0xf9, 0xd4, 0x75, 0x64, 0x88, 0xae, 0x0c, 0x91, - 0x87, 0xd0, 0x04, 0x3a, 0x9c, 0xfb, 0x66, 0x4f, 0xee, 0x88, 0xa5, 0xbd, 0x0b, 0x3b, 0xc5, 0x93, - 0xea, 0x14, 0xbe, 0x84, 0x3d, 0x85, 0x9c, 0xbd, 0x0d, 0xdc, 0x33, 0xf9, 0x12, 0x5a, 0x15, 0xfc, - 0x6f, 0x03, 0xcc, 0xaa, 0xa3, 0xee, 0xe0, 0xa6, 0xf6, 0xfb, 0xb7, 0xa7, 0x47, 0x1f, 0xc0, 0x90, - 0x3b, 0xd4, 0x5f, 0x84, 0xcb, 0x25, 0x23, 0xdc, 0xdc, 0x9a, 0x19, 0xf3, 0x2e, 0x06, 0x01, 0xbd, - 0x90, 0x08, 0x3a, 0x80, 0x89, 0xab, 0xba, 0x74, 0x11, 0x93, 0x6b, 0xca, 0x44, 0xe4, 0xbe, 0x24, - 0xbe, 0xe3, 0xa6, 0xdd, 0xab, 0x60, 0x64, 0xc3, 0x98, 0x7a, 0x37, 0x0b, 0x39, 0x1c, 0xe4, 0xd3, - 0x1e, 0xc8, 0x68, 0x43, 0xea, 0xdd, 0xfc, 0x48, 0x7d, 0x72, 0x26, 0x5e, 0xf8, 0x17, 0xb0, 0x7b, - 0x9b, 0xdc, 0x69, 0xe0, 0x91, 0x9b, 0x56, 0x45, 0xf9, 0x29, 0x5f, 0x4c, 0xed, 0xa6, 0x4b, 0x72, - 0x08, 0x88, 0x0a, 0x40, 0xf1, 0xba, 0x61, 0xc0, 0x49, 0xc0, 0x65, 0x80, 0x11, 0x9e, 0xc8, 0x1d, - 0x41, 0x7e, 0xac, 0x70, 0xfb, 0x0f, 0x03, 0xee, 0xde, 0x46, 0x3a, 0x71, 0xb8, 0xd3, 0xaa, 0xb5, - 0x2c, 0x18, 0x64, 0xd9, 0x6f, 0xaa, 0xbd, 0xf4, 0x5b, 0x8c, 0x3d, 0x5d, 0xbd, 0x8e, 0xdc, 0xd1, - 0x5f, 0x75, 0x03, 0x4e, 0x90, 0x04, 0x84, 0x78, 0x6a, 0x7a, 0xaa, 0x6b, 0x18, 0x28, 0xe0, 0xd4, - 0xb3, 0xbf, 0xc9, 0xd7, 0x46, 0x1d, 0x4d, 0xe7, 0xf8, 0x21, 0x8c, 0x6a, 0xb2, 0x1b, 0x2e, 0x73, - 0x89, 0x7d, 0x0e, 0x48, 0x39, 0x3f, 0x0f, 0x93, 0xa0, 0xdd, 0xcc, 0xb8, 0x0b, 0xd3, 0x82, 0x8b, - 0x6e, 0xdc, 0xa7, 0xb0, 0xa3, 0xe0, 0x97, 0xc1, 0xaa, 0x75, 0xac, 0xbd, 0xb4, 0xac, 0x99, 0x93, - 0x8e, 0x96, 0x91, 0xbc, 0xa4, 0x3f, 0x8b, 0x19, 0xab, 0x82, 0x89, 0x57, 0x53, 0x84, 0xb5, 0xf9, - 0x2f, 0x00, 0x27, 0x94, 0x5d, 0xa9, 0xb6, 0x17, 0xfd, 0xea, 0xd1, 0x58, 0xcf, 0x0e, 0xb1, 0x14, - 0x88, 0xe3, 0xfb, 0xf2, 0x0e, 0xba, 0x58, 0x2c, 0x45, 0x99, 0x13, 0x46, 0x3c, 0x59, 0xfc, 0x2e, - 0x96, 0x6b, 0x81, 0x2d, 0x63, 0xa2, 0x4a, 0xdf, 0xc5, 0x72, 0x6d, 0xff, 0x69, 0xc0, 0xf6, 0x73, - 0xb2, 0xd2, 0x91, 0xf7, 0x01, 0x2e, 0xc2, 0x38, 0x4c, 0x38, 0x0d, 0x08, 0x93, 0x04, 0x3d, 0x9c, - 0x43, 0xfe, 0x3b, 0x8f, 0xbc, 0x76, 0xe2, 0x2f, 0xe5, 0xed, 0x76, 0xb1, 0x5c, 0x0b, 0xec, 0x92, - 0x38, 0x91, 0x7e, 0x5e, 0x72, 0x2d, 0x54, 0x91, 0x71, 0xc7, 0xbd, 0x92, 0xaf, 0xa9, 0x8b, 0xd5, - 0xc7, 0x93, 0xdf, 0x01, 0x46, 0xba, 0x09, 0xa4, 0x2c, 0xa3, 0xd7, 0x30, 0xcc, 0xc9, 0x39, 0xfa, - 0xb8, 0xaa, 0xda, 0xd5, 0x9f, 0x07, 0xd6, 0x27, 0x0d, 0x56, 0xba, 0xd8, 0x1b, 0x28, 0x80, 0xff, - 0x57, 0xe4, 0x12, 0x3d, 0xaa, 0x7a, 0xaf, 0x13, 0x63, 0xeb, 0x71, 0x2b, 0xdb, 0x8c, 0x8f, 0xc3, - 0xb4, 0x46, 0xff, 0xd0, 0x61, 0x43, 0x94, 0x82, 0x06, 0x5b, 0x9f, 0xb6, 0xb4, 0xce, 0x58, 0xdf, - 0x00, 0xaa, 0x8a, 0x23, 0x7a, 0xdc, 0x18, 0xe6, 0x56, 0x7c, 0xad, 0xc3, 0x76, 0xc6, 0x6b, 0x13, - 0x55, 0xb2, 0xd9, 0x98, 0x68, 0x41, 0x98, 0x1b, 0x13, 0x2d, 0x69, 0xf1, 0x06, 0xba, 0x82, 0x49, - 0x59, 0x52, 0xd1, 0xc1, 0xba, 0xdf, 0x79, 0x15, 0xc5, 0xb6, 0x1e, 0xb5, 0x31, 0xcd, 0xc8, 0x16, - 0x30, 0xca, 0x0b, 0x1f, 0xaa, 0x69, 0xba, 0x1a, 0x09, 0xb7, 0x1e, 0x34, 0x99, 0xe5, 0xb3, 0x29, - 0x0b, 0x61, 0x5d, 0x36, 0x6b, 0x54, 0xb6, 0x2e, 0x9b, 0x75, 0xba, 0x6a, 0x6f, 0xa0, 0x4b, 0xb8, - 0x53, 0x52, 0x18, 0x34, 0x7f, 0x57, 0x80, 0xbc, 0x76, 0x59, 0x07, 0x2d, 0x2c, 0x33, 0x26, 0x02, - 0xff, 0x2b, 0x8e, 0x79, 0xf4, 0xf0, 0x5d, 0xee, 0x39, 0x8d, 0xb2, 0xe6, 0xcd, 0x86, 0x19, 0xcd, - 0x6b, 0x18, 0xe6, 0xa6, 0x7b, 0xdd, 0xe0, 0xa8, 0xea, 0x45, 0xdd, 0xe0, 0xa8, 0x93, 0x88, 0x0d, - 0x74, 0x0e, 0xe3, 0xc2, 0xbc, 0x47, 0x0f, 0xd6, 0x79, 0x16, 0x55, 0xc4, 0x7a, 0xd8, 0x68, 0x97, - 0x72, 0x9c, 0x6f, 0xc9, 0xff, 0x44, 0x4f, 0xff, 0x09, 0x00, 0x00, 0xff, 0xff, 0x60, 0xe6, 0x57, - 0xdf, 0x2a, 0x0d, 0x00, 0x00, + // 3280 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xd4, 0x3b, 0x4d, 0x6f, 0x1c, 0xc7, + 0xb1, 0x1c, 0x2e, 0x3f, 0x76, 0x6b, 0x77, 0x45, 0xaa, 0x49, 0x53, 0xeb, 0x21, 0x29, 0x51, 0x23, + 0x7f, 0x90, 0xb2, 0x45, 0xca, 0xb4, 0xfd, 0xac, 0x27, 0x3f, 0xfb, 0x59, 0xa2, 0x44, 0x59, 0xb6, + 0x48, 0xd9, 0x43, 0x59, 0x7e, 0x2f, 0x36, 0x32, 0x18, 0xce, 0xf4, 0x92, 0x63, 0xce, 0xce, 0x8c, + 0x66, 0x7a, 0x69, 0xad, 0xe0, 0x9c, 0x1c, 0x20, 0x01, 0x82, 0xe4, 0x10, 0xe4, 0x92, 0x4b, 0x80, + 0x20, 0xf7, 0x5c, 0xf3, 0x17, 0xfc, 0x07, 0x02, 0xe4, 0x94, 0x4b, 0xce, 0x39, 0xe4, 0x10, 0x20, + 0x40, 0x2e, 0x41, 0x7f, 0xcd, 0xce, 0x27, 0x77, 0x18, 0x31, 0x08, 0x72, 0x9b, 0xae, 0xae, 0xae, + 0xea, 0xaa, 0xae, 0xaa, 0xae, 0xae, 0xda, 0x85, 0xb9, 0x63, 0xdf, 0xed, 0xf7, 0xb0, 0x11, 0xe1, + 0xf0, 0x18, 0x87, 0xeb, 0x41, 0xe8, 0x13, 0x1f, 0xcd, 0xa6, 0x80, 0x46, 0xb0, 0xaf, 0x6d, 0x00, + 0xba, 0x6d, 0x12, 0xeb, 0xf0, 0x0e, 0x76, 0x31, 0xc1, 0x3a, 0x7e, 0xd2, 0xc7, 0x11, 0x41, 0x2f, + 0x42, 0xbd, 0xeb, 0xb8, 0xd8, 0x70, 0xec, 0xa8, 0xa3, 0xac, 0xd4, 0x56, 0x1b, 0xfa, 0x34, 0x1d, + 0xdf, 0xb7, 0x23, 0xed, 0x21, 0xcc, 0xa5, 0x16, 0x44, 0x81, 0xef, 0x45, 0x18, 0xdd, 0x80, 0xe9, + 0x10, 0x47, 0x7d, 0x97, 0xf0, 0x05, 0xcd, 0xcd, 0x8b, 0xeb, 0x59, 0x5e, 0xeb, 0xf1, 0x92, 0xbe, + 0x4b, 0x74, 0x89, 0xae, 0x7d, 0xab, 0x40, 0x2b, 0x39, 0x83, 0x2e, 0xc0, 0xb4, 0x60, 0xde, 0x51, + 0x56, 0x94, 0xd5, 0x86, 0x3e, 0xc5, 0x79, 0xa3, 0x05, 0x98, 0x8a, 0x88, 0x49, 0xfa, 0x51, 0x67, + 0x7c, 0x45, 0x59, 0x9d, 0xd4, 0xc5, 0x08, 0xcd, 0xc3, 0x24, 0x0e, 0x43, 0x3f, 0xec, 0xd4, 0x18, + 0x3a, 0x1f, 0x20, 0x04, 0x13, 0x91, 0xf3, 0x0c, 0x77, 0x26, 0x56, 0x94, 0xd5, 0xb6, 0xce, 0xbe, + 0x51, 0x07, 0xa6, 0x8f, 0x71, 0x18, 0x39, 0xbe, 0xd7, 0x99, 0x64, 0x60, 0x39, 0xd4, 0x3e, 0x82, + 0x73, 0xdb, 0x8e, 0x8b, 0xef, 0x61, 0x22, 0x75, 0x50, 0xba, 0x8d, 0x4b, 0xd0, 0x34, 0x2d, 0x0b, + 0x07, 0xc4, 0x38, 0x78, 0xe6, 0x04, 0x6c, 0x2f, 0x75, 0x1d, 0x38, 0xe8, 0xde, 0x33, 0x27, 0xd0, + 0x7e, 0x54, 0x83, 0x99, 0x98, 0x98, 0xd0, 0x0f, 0x82, 0x09, 0xdb, 0x24, 0x26, 0x23, 0xd5, 0xd2, + 0xd9, 0x37, 0x7a, 0x19, 0xce, 0x59, 0xbe, 0x47, 0xb0, 0x47, 0x0c, 0x17, 0x7b, 0x07, 0xe4, 0x90, + 0xd1, 0x6a, 0xeb, 0x6d, 0x01, 0x7d, 0xc0, 0x80, 0xe8, 0x32, 0xb4, 0x24, 0x1a, 0x19, 0x04, 0x58, + 0x48, 0xd9, 0x14, 0xb0, 0x47, 0x83, 0x00, 0xa3, 0x2b, 0xd0, 0x76, 0xcd, 0x88, 0x18, 0x3d, 0xdf, + 0x76, 0xba, 0x0e, 0xb6, 0x99, 0xd0, 0x13, 0x7a, 0x8b, 0x02, 0x77, 0x04, 0x0c, 0xa9, 0xfc, 0x50, + 0x3d, 0xb3, 0x87, 0x99, 0xf4, 0x0d, 0x3d, 0x1e, 0xd3, 0xed, 0x61, 0x62, 0x1e, 0x74, 0xa6, 0x18, + 0x9c, 0x7d, 0xa3, 0x65, 0x00, 0x27, 0x62, 0x32, 0x06, 0xd8, 0xee, 0x4c, 0x33, 0x31, 0x1b, 0x4e, + 0x74, 0x8f, 0x03, 0xd0, 0x87, 0x30, 0x7d, 0x88, 0x4d, 0x1b, 0x87, 0x51, 0xa7, 0xce, 0x4e, 0x7c, + 0x3d, 0x7f, 0xe2, 0x19, 0x2d, 0xac, 0x7f, 0xc8, 0x17, 0xdc, 0xf5, 0x48, 0x38, 0xd0, 0xe5, 0x72, + 0xb4, 0x04, 0x0d, 0x76, 0x64, 0x5b, 0xbe, 0x8d, 0x3b, 0x0d, 0x76, 0xb4, 0x43, 0x80, 0x7a, 0x13, + 0x5a, 0xc9, 0x65, 0x68, 0x16, 0x6a, 0x47, 0x78, 0x20, 0xce, 0x84, 0x7e, 0xd2, 0xf3, 0x3f, 0x36, + 0xdd, 0x3e, 0x66, 0xea, 0x6b, 0xe8, 0x7c, 0x70, 0x73, 0xfc, 0x86, 0xa2, 0x4d, 0xc3, 0xe4, 0xdd, + 0x5e, 0x40, 0x06, 0xda, 0x3b, 0xd0, 0x79, 0x6c, 0x5a, 0xfd, 0x7e, 0xef, 0x31, 0xdb, 0xe2, 0xd6, + 0x21, 0xb6, 0x8e, 0xe4, 0x41, 0x2f, 0x42, 0x43, 0x6c, 0x5c, 0x1c, 0x75, 0x5b, 0xaf, 0x73, 0xc0, + 0x7d, 0x5b, 0xfb, 0x00, 0x5e, 0x2c, 0x58, 0x28, 0x0e, 0xf5, 0x0a, 0xb4, 0x0f, 0xcc, 0x70, 0xdf, + 0x3c, 0xc0, 0x46, 0x68, 0x12, 0xc7, 0x67, 0xab, 0x15, 0xbd, 0x25, 0x80, 0x3a, 0x85, 0x69, 0x5f, + 0x80, 0x9a, 0xa2, 0xe0, 0xf7, 0x02, 0xd3, 0x22, 0x55, 0x98, 0xa3, 0x15, 0x68, 0x06, 0x21, 0x36, + 0x5d, 0xd7, 0xb7, 0x4c, 0xc2, 0xc5, 0xab, 0xe9, 0x49, 0x90, 0xb6, 0x0c, 0x8b, 0x85, 0xc4, 0xf9, + 0x06, 0xb5, 0x1b, 0x99, 0xdd, 0xfb, 0xbd, 0x9e, 0x53, 0x89, 0xb5, 0xb6, 0x94, 0xdb, 0x35, 0x5b, + 0x29, 0xe8, 0xfe, 0x77, 0x66, 0xd6, 0xc5, 0xa6, 0xd7, 0x0f, 0x2a, 0x11, 0xce, 0xee, 0x58, 0x2e, + 0x8d, 0x29, 0x5f, 0xe0, 0xc1, 0x60, 0xcb, 0x77, 0x5d, 0x6c, 0x11, 0xc7, 0xf7, 0x24, 0xd9, 0x8b, + 0x00, 0x56, 0x0c, 0x14, 0xe7, 0x9f, 0x80, 0x68, 0x2a, 0x74, 0xf2, 0x4b, 0x05, 0xd9, 0x3f, 0x2a, + 0xf0, 0xc2, 0x2d, 0xa1, 0x34, 0xce, 0xb8, 0xd2, 0x01, 0xa4, 0x59, 0x8e, 0x67, 0x59, 0x66, 0x0f, + 0xa8, 0x96, 0x3b, 0x20, 0x8a, 0x11, 0xe2, 0xc0, 0x75, 0x2c, 0x93, 0x91, 0x98, 0xe0, 0xbe, 0x9b, + 0x00, 0x51, 0x7b, 0x26, 0xc4, 0x15, 0x1e, 0x49, 0x3f, 0xd1, 0x26, 0x2c, 0xf4, 0x70, 0xcf, 0x0f, + 0x07, 0x46, 0xcf, 0x0c, 0x8c, 0x9e, 0xf9, 0xd4, 0xa0, 0xc1, 0xcb, 0xe8, 0xed, 0x33, 0xf7, 0x6c, + 0xeb, 0x88, 0xcf, 0xee, 0x98, 0xc1, 0x8e, 0xf9, 0x74, 0xcf, 0x79, 0x86, 0x77, 0xf6, 0xb5, 0x0e, + 0x2c, 0x64, 0xe5, 0x13, 0xa2, 0xff, 0x17, 0x5c, 0xe0, 0x90, 0xbd, 0x81, 0x67, 0xed, 0xb1, 0x88, + 0x59, 0xe9, 0xa0, 0xfe, 0xae, 0x40, 0x27, 0xbf, 0x50, 0x58, 0xfe, 0xf3, 0x6a, 0xed, 0xd4, 0x3a, + 0xb9, 0x04, 0x4d, 0x62, 0x3a, 0xae, 0xe1, 0x77, 0xbb, 0x11, 0x26, 0x4c, 0x11, 0x13, 0x3a, 0x50, + 0xd0, 0x43, 0x06, 0x41, 0x6b, 0x30, 0x6b, 0x71, 0xeb, 0x37, 0x42, 0x7c, 0xec, 0xb0, 0x18, 0x3f, + 0xcd, 0x36, 0x36, 0x63, 0x49, 0xaf, 0xe0, 0x60, 0xa4, 0x41, 0xdb, 0xb1, 0x9f, 0x1a, 0x2c, 0xba, + 0xb3, 0x2b, 0xa2, 0xce, 0xa8, 0x35, 0x1d, 0xfb, 0x29, 0x0d, 0x58, 0x54, 0xa3, 0xda, 0x63, 0x58, + 0xe2, 0xc2, 0xdf, 0xf7, 0xac, 0x10, 0xf7, 0xb0, 0x47, 0x4c, 0x77, 0xcb, 0x0f, 0x06, 0x95, 0xcc, + 0xe6, 0x45, 0xa8, 0x47, 0x8e, 0x67, 0x61, 0xc3, 0xe3, 0x57, 0xd5, 0x84, 0x3e, 0xcd, 0xc6, 0xbb, + 0x91, 0x76, 0x1b, 0x96, 0x4b, 0xe8, 0x0a, 0xcd, 0x5e, 0x86, 0x16, 0xdb, 0x98, 0x08, 0xef, 0xe2, + 0xc2, 0x68, 0x52, 0xd8, 0x16, 0x07, 0x69, 0x6f, 0x00, 0xe2, 0x34, 0x76, 0xfc, 0xbe, 0x57, 0xcd, + 0x9d, 0x5f, 0x80, 0xb9, 0xd4, 0x12, 0x61, 0x1b, 0x6f, 0xc2, 0x3c, 0x07, 0x7f, 0xe6, 0xf5, 0x2a, + 0xd3, 0xba, 0x00, 0x2f, 0x64, 0x16, 0x09, 0x6a, 0x9b, 0x92, 0x49, 0x3a, 0x99, 0x38, 0x91, 0xd8, + 0x82, 0xdc, 0x41, 0x3a, 0x9f, 0x60, 0x91, 0x8b, 0x6f, 0xd8, 0x0c, 0x8f, 0x74, 0x6c, 0xda, 0xbe, + 0xe7, 0x0e, 0x2a, 0x47, 0xae, 0x82, 0x95, 0x82, 0xee, 0xe7, 0xb0, 0x20, 0x23, 0x9a, 0xd7, 0x75, + 0x0e, 0xfa, 0x21, 0xae, 0x1a, 0x89, 0x93, 0x26, 0x3b, 0x9e, 0x33, 0x59, 0x6d, 0x43, 0xba, 0x59, + 0x82, 0xb0, 0x38, 0xd2, 0x38, 0x3f, 0x51, 0x12, 0xf9, 0x89, 0xf6, 0x5b, 0x05, 0xce, 0xcb, 0x15, + 0x15, 0xed, 0xea, 0x94, 0x8e, 0x55, 0x2b, 0x75, 0xac, 0x89, 0xa1, 0x63, 0xad, 0xc2, 0x6c, 0xe4, + 0xf7, 0x43, 0x0b, 0x1b, 0x34, 0x27, 0x31, 0x3c, 0x7a, 0x07, 0x73, 0xbf, 0x3b, 0xc7, 0xe1, 0x77, + 0x4c, 0x62, 0xee, 0xfa, 0x36, 0xd6, 0xfe, 0x57, 0x9a, 0x5d, 0xca, 0x5e, 0xd7, 0xe0, 0x3c, 0x4b, + 0x3d, 0xcc, 0x20, 0xc0, 0x9e, 0x6d, 0x98, 0x84, 0x1a, 0xbd, 0xc2, 0x8c, 0xfe, 0x1c, 0x9d, 0xb8, + 0xc5, 0xe0, 0xb7, 0xc8, 0x6e, 0xa4, 0xfd, 0x62, 0x1c, 0x66, 0xe8, 0x5a, 0xea, 0x64, 0x95, 0xe4, + 0x9d, 0x85, 0x1a, 0x7e, 0x4a, 0x84, 0xa0, 0xf4, 0x13, 0x6d, 0xc0, 0x9c, 0xf0, 0x66, 0xc7, 0xf7, + 0x86, 0x8e, 0x5e, 0xe3, 0x71, 0x71, 0x38, 0x15, 0xfb, 0xfa, 0x25, 0x68, 0x46, 0xc4, 0x0f, 0x64, + 0xdc, 0xe0, 0x79, 0x11, 0x50, 0x90, 0x88, 0x1b, 0x69, 0x9d, 0x4e, 0x16, 0xe8, 0xb4, 0xe5, 0x44, + 0x06, 0xb6, 0x0c, 0xbe, 0x2b, 0x16, 0x79, 0xea, 0x3a, 0x38, 0xd1, 0x5d, 0x8b, 0x6b, 0x03, 0xbd, + 0x0f, 0x4b, 0xce, 0x81, 0xe7, 0x87, 0xd8, 0x10, 0x8a, 0x64, 0xfe, 0xeb, 0xf9, 0xc4, 0xe8, 0xfa, + 0x7d, 0x4f, 0x66, 0x4e, 0x1d, 0x8e, 0xb3, 0xc7, 0x50, 0xa8, 0x06, 0x76, 0x7d, 0xb2, 0x4d, 0xe7, + 0xb5, 0xb7, 0x61, 0x76, 0xa8, 0x95, 0xea, 0x51, 0xe0, 0x5b, 0x45, 0x5a, 0xdc, 0x23, 0xd3, 0x71, + 0xf7, 0xb0, 0x67, 0xe3, 0xf0, 0x39, 0xa3, 0x13, 0xba, 0x0e, 0xf3, 0x8e, 0xed, 0x62, 0x83, 0x38, + 0x3d, 0xec, 0xf7, 0x89, 0x11, 0x61, 0xcb, 0xf7, 0xec, 0x48, 0xea, 0x97, 0xce, 0x3d, 0xe2, 0x53, + 0x7b, 0x7c, 0x46, 0xfb, 0x61, 0x7c, 0x4b, 0x24, 0x77, 0x31, 0xcc, 0x8f, 0x3c, 0x8c, 0x29, 0x41, + 0x9e, 0xea, 0x09, 0x31, 0x5a, 0x1c, 0xc8, 0xb3, 0x3a, 0x7a, 0x42, 0x02, 0x69, 0xdf, 0xb7, 0x07, + 0x6c, 0x47, 0x2d, 0x1d, 0x38, 0xe8, 0xb6, 0x6f, 0x0f, 0x58, 0xb8, 0x8e, 0x0c, 0x66, 0x64, 0xd6, + 0x61, 0xdf, 0x3b, 0x62, 0xbb, 0xa9, 0xeb, 0x4d, 0x27, 0x7a, 0x60, 0x46, 0x64, 0x8b, 0x82, 0xb4, + 0xdf, 0x29, 0x32, 0x5e, 0xd0, 0x6d, 0xe8, 0xd8, 0xc2, 0xce, 0xf1, 0xbf, 0x41, 0x1d, 0x74, 0x85, + 0x30, 0x82, 0x54, 0x2e, 0x2c, 0x1c, 0x0e, 0xf1, 0x39, 0x71, 0xab, 0xb2, 0x99, 0x61, 0xb8, 0x4a, + 0x6f, 0x5c, 0x84, 0xab, 0x2f, 0xe5, 0x75, 0x71, 0xd7, 0xda, 0x3b, 0x34, 0x43, 0x3b, 0xba, 0x87, + 0x3d, 0x1c, 0x9a, 0xe4, 0x4c, 0xd2, 0x17, 0x6d, 0x05, 0x2e, 0x96, 0x51, 0x17, 0xfc, 0xbf, 0x90, + 0xd7, 0xa0, 0xc4, 0xd0, 0xf1, 0x7e, 0xdf, 0x71, 0xed, 0x33, 0x61, 0xff, 0x71, 0x56, 0xb8, 0x98, + 0xb8, 0xb0, 0x9f, 0xab, 0x70, 0x3e, 0x64, 0x20, 0x62, 0x44, 0x14, 0x21, 0x7e, 0x8f, 0xb6, 0xf5, + 0x19, 0x31, 0xc1, 0x16, 0xd2, 0x77, 0xe9, 0x4f, 0xc6, 0xa5, 0x05, 0x48, 0x6a, 0x67, 0x16, 0x56, + 0x17, 0xa1, 0x31, 0x64, 0x5f, 0x63, 0xec, 0xeb, 0x91, 0xe0, 0x4b, 0xad, 0xd3, 0xf2, 0x83, 0x81, + 0x81, 0x2d, 0x9e, 0x51, 0xb0, 0xa3, 0xae, 0xd3, 0xe7, 0x59, 0x30, 0xb8, 0x6b, 0xb1, 0x84, 0xa2, + 0x7a, 0x8c, 0x4d, 0x50, 0xfb, 0x8a, 0x53, 0x9b, 0x4a, 0x52, 0xfb, 0x8a, 0x51, 0x93, 0x38, 0xc7, + 0x4e, 0x97, 0xe3, 0x4c, 0x0f, 0x71, 0x1e, 0x3b, 0x5d, 0x8a, 0x33, 0xb4, 0xaa, 0xb4, 0x32, 0xc4, + 0xa9, 0x7e, 0x0d, 0x8b, 0xe9, 0xd9, 0xea, 0x17, 0xf6, 0x73, 0x29, 0x4b, 0xbb, 0x98, 0x35, 0xa7, + 0xcc, 0xad, 0x7f, 0x9c, 0xdd, 0x76, 0xe5, 0x0c, 0xe7, 0xf9, 0xf6, 0xb5, 0x9c, 0x55, 0x48, 0x3a, + 0x4d, 0xfa, 0xbf, 0xec, 0xb6, 0x4f, 0x91, 0x2e, 0x9d, 0xcc, 0xf8, 0x52, 0xd6, 0x05, 0xb2, 0x39, + 0xd5, 0x2f, 0xe3, 0xf8, 0x2a, 0x30, 0x68, 0x46, 0x53, 0x39, 0xae, 0x09, 0xbe, 0xa2, 0xae, 0x30, + 0x2d, 0xd8, 0xa2, 0x05, 0x98, 0x12, 0xf7, 0x21, 0x7f, 0xb1, 0x88, 0x51, 0xaa, 0x64, 0x52, 0x13, + 0x25, 0x13, 0x59, 0x0a, 0xa2, 0x6f, 0xee, 0x49, 0x1e, 0x1e, 0xe9, 0xf8, 0x63, 0x3c, 0xd0, 0x76, + 0x33, 0x1e, 0xc7, 0xb7, 0x76, 0x42, 0xc1, 0x83, 0x57, 0x14, 0x6c, 0x76, 0xe6, 0xb6, 0x28, 0x9c, + 0x34, 0x1c, 0x61, 0x04, 0xb6, 0xf6, 0x53, 0x65, 0x48, 0xf0, 0xb6, 0xeb, 0xef, 0x9f, 0xa1, 0x55, + 0x26, 0xa5, 0xa8, 0xa5, 0xa4, 0x48, 0xd6, 0x84, 0x26, 0xd2, 0x35, 0xa1, 0x84, 0x13, 0x25, 0xb7, + 0x53, 0x16, 0x9a, 0x1f, 0xf9, 0x67, 0xf7, 0xb2, 0xcc, 0x87, 0xe6, 0x21, 0x75, 0xc1, 0xff, 0x26, + 0x2c, 0x52, 0x85, 0x73, 0x28, 0x7b, 0xb7, 0x54, 0x7f, 0xdb, 0xfd, 0x79, 0x1c, 0x96, 0x8a, 0x17, + 0x57, 0x79, 0xdf, 0xbd, 0x0b, 0x6a, 0xfc, 0x7e, 0xa2, 0x57, 0x63, 0x44, 0xcc, 0x5e, 0x10, 0x5f, + 0x8e, 0xfc, 0x0e, 0xbd, 0x20, 0x1e, 0x53, 0x8f, 0xe4, 0xbc, 0xbc, 0x21, 0x73, 0x8f, 0xaf, 0x5a, + 0xee, 0xf1, 0x45, 0x19, 0xd8, 0x26, 0x29, 0x63, 0xc0, 0x73, 0xb8, 0x0b, 0xb6, 0x49, 0xca, 0x18, + 0xc4, 0x8b, 0x19, 0x03, 0x6e, 0xb5, 0x4d, 0x81, 0xcf, 0x18, 0x2c, 0x03, 0x88, 0xf4, 0xaa, 0xef, + 0xc9, 0xc7, 0x64, 0x83, 0x27, 0x57, 0x7d, 0xaf, 0x34, 0xcb, 0x9c, 0x2e, 0xcd, 0x32, 0xd3, 0xa7, + 0x59, 0xcf, 0x9d, 0xe6, 0xaf, 0x14, 0x80, 0x3b, 0x4e, 0x74, 0xc4, 0xb5, 0x4c, 0xf3, 0x5a, 0xdb, + 0x91, 0xcf, 0x01, 0xfa, 0x49, 0x21, 0xa6, 0xeb, 0x0a, 0xdd, 0xd1, 0x4f, 0xea, 0x3f, 0xfd, 0x08, + 0xdb, 0x42, 0x3d, 0xec, 0x9b, 0xc2, 0xba, 0x21, 0xc6, 0x42, 0x03, 0xec, 0x9b, 0x66, 0x8a, 0x01, + 0x0e, 0x2d, 0xec, 0x11, 0x83, 0xcd, 0x51, 0x69, 0xc7, 0xf5, 0xa6, 0x80, 0x6d, 0x67, 0x50, 0x18, + 0xc9, 0xa9, 0x14, 0xca, 0x67, 0x11, 0xb6, 0xb5, 0xdf, 0x28, 0xd0, 0xd8, 0xc1, 0x3d, 0xb1, 0xbf, + 0x8b, 0x00, 0x07, 0x7e, 0xe8, 0xf7, 0x89, 0xe3, 0x61, 0x9e, 0xcc, 0x4f, 0xea, 0x09, 0xc8, 0x73, + 0xec, 0x96, 0x46, 0x18, 0xec, 0x76, 0xc5, 0x99, 0xb0, 0x6f, 0x0a, 0x3b, 0xc4, 0x66, 0x20, 0x8e, + 0x81, 0x7d, 0xd3, 0x27, 0x53, 0x44, 0x4c, 0xeb, 0x88, 0xe9, 0x7c, 0x42, 0xe7, 0x03, 0xed, 0x0f, + 0x0a, 0x80, 0x8e, 0x7b, 0x3e, 0x61, 0x26, 0x4b, 0xe5, 0xda, 0x37, 0xad, 0x23, 0xfa, 0xec, 0x60, + 0x85, 0x51, 0xae, 0xcf, 0xa6, 0x80, 0xb1, 0xc2, 0xe8, 0x32, 0x80, 0x44, 0x11, 0x61, 0xb0, 0xa1, + 0x37, 0x04, 0x84, 0x3f, 0x30, 0x64, 0x44, 0x10, 0xb5, 0xc4, 0x61, 0x68, 0xe4, 0xdb, 0x96, 0xa1, + 0x71, 0x11, 0x1a, 0x59, 0x8b, 0x62, 0x11, 0x85, 0x99, 0xd3, 0x15, 0x68, 0xcb, 0xca, 0x2b, 0xb3, + 0x57, 0x21, 0x4a, 0x4b, 0x02, 0xa9, 0x8d, 0xb2, 0x2a, 0xe7, 0x53, 0x82, 0xbd, 0xd8, 0x94, 0x1a, + 0xfa, 0x10, 0xa0, 0x7d, 0x03, 0x20, 0xeb, 0x02, 0x5d, 0x1f, 0x6d, 0xc2, 0x24, 0x25, 0x2e, 0x6b, + 0xe9, 0x4b, 0xf9, 0xca, 0xea, 0x50, 0x0d, 0x3a, 0x47, 0x4d, 0xc6, 0xb1, 0xf1, 0x54, 0x1c, 0x1b, + 0xfd, 0x2c, 0xd4, 0xbe, 0x53, 0x60, 0x45, 0x64, 0xa1, 0x0e, 0x0e, 0x77, 0xfc, 0x63, 0x9a, 0x91, + 0x3c, 0xf2, 0x39, 0x93, 0x33, 0x09, 0xc0, 0x37, 0xa0, 0x63, 0xe3, 0x88, 0x38, 0x1e, 0x63, 0x68, + 0xc8, 0x43, 0x61, 0xc5, 0x68, 0xbe, 0xa1, 0x85, 0xc4, 0xfc, 0x6d, 0x3e, 0xbd, 0x6b, 0xf6, 0x30, + 0xba, 0x06, 0x73, 0x47, 0x18, 0x07, 0x86, 0xeb, 0x5b, 0xa6, 0x6b, 0x48, 0xd7, 0x16, 0x69, 0xd6, + 0x2c, 0x9d, 0x7a, 0x40, 0x67, 0xee, 0x70, 0xf7, 0xd6, 0x22, 0xb8, 0x7c, 0x82, 0x24, 0x22, 0xbc, + 0x2d, 0x41, 0x23, 0x08, 0x7d, 0x0b, 0x47, 0xd4, 0x66, 0x15, 0x76, 0xdb, 0x0d, 0x01, 0xe8, 0x3a, + 0xcc, 0xc5, 0x83, 0x4f, 0xb8, 0x93, 0x98, 0x07, 0xbc, 0xfc, 0x3a, 0xae, 0x17, 0x4d, 0x69, 0x3f, + 0x57, 0x40, 0xcb, 0x71, 0xdd, 0x0e, 0xfd, 0xde, 0x19, 0x6a, 0x70, 0x03, 0xe6, 0x99, 0x1e, 0x42, + 0x46, 0x72, 0xa8, 0x08, 0xfe, 0x1a, 0x3a, 0x4f, 0xe7, 0x38, 0x37, 0xa9, 0x89, 0x3e, 0x5c, 0x39, + 0x71, 0x4f, 0xff, 0x22, 0x5d, 0x2c, 0xca, 0x4b, 0x9c, 0x3f, 0x70, 0x52, 0xb7, 0x92, 0xf6, 0x6b, + 0x45, 0xde, 0xa9, 0xe9, 0x59, 0xb1, 0x97, 0x5b, 0xd0, 0xb6, 0x9d, 0xe8, 0xc8, 0xe0, 0x8d, 0x9d, + 0x93, 0xec, 0x7f, 0x18, 0x4d, 0xf5, 0x96, 0x1d, 0x7f, 0xe3, 0x08, 0x7d, 0x00, 0x6d, 0x51, 0x3c, + 0x4d, 0xf4, 0x8a, 0x9a, 0x9b, 0x8b, 0x79, 0x12, 0x71, 0xbc, 0xd3, 0x5b, 0x7c, 0x05, 0x1f, 0x69, + 0x7f, 0x6b, 0x41, 0xeb, 0xd3, 0x3e, 0x0e, 0x07, 0x89, 0xc2, 0x73, 0x84, 0xc5, 0x31, 0xc8, 0x7e, + 0x58, 0x02, 0x42, 0x6f, 0x9c, 0x6e, 0xe8, 0xf7, 0x8c, 0xb8, 0x65, 0x36, 0xce, 0x50, 0x9a, 0x14, + 0xb8, 0xcd, 0xdb, 0x66, 0xe8, 0x3d, 0x98, 0xea, 0x3a, 0x2e, 0xc1, 0xbc, 0x49, 0xd5, 0xdc, 0x7c, + 0x39, 0xbf, 0x9f, 0x24, 0xcf, 0xf5, 0x6d, 0x86, 0xac, 0x8b, 0x45, 0x68, 0x1f, 0xe6, 0x1c, 0x2f, + 0x60, 0x4f, 0xd0, 0xd0, 0x31, 0x5d, 0xe7, 0xd9, 0xb0, 0x74, 0xda, 0xdc, 0x7c, 0x63, 0x04, 0xad, + 0xfb, 0x74, 0xe5, 0x5e, 0x72, 0xa1, 0x8e, 0x9c, 0x1c, 0x0c, 0x61, 0x98, 0xf7, 0xfb, 0x24, 0xcf, + 0x64, 0x92, 0x31, 0xd9, 0x1c, 0xc1, 0xe4, 0x21, 0x5b, 0x9a, 0xe6, 0x32, 0xe7, 0xe7, 0x81, 0xea, + 0x2e, 0x4c, 0x71, 0xe1, 0x68, 0x90, 0xef, 0x3a, 0xd8, 0x95, 0xfd, 0x35, 0x3e, 0xa0, 0x71, 0xcc, + 0x0f, 0x70, 0x68, 0x7a, 0x32, 0x5e, 0xcb, 0xe1, 0xb0, 0xcf, 0x53, 0x4b, 0xf4, 0x79, 0xd4, 0xdf, + 0x4f, 0x02, 0xca, 0x4b, 0x28, 0xeb, 0xc1, 0x21, 0x8e, 0x68, 0x0c, 0x4c, 0x5e, 0x10, 0x33, 0x09, + 0x38, 0xbb, 0x24, 0x3e, 0x87, 0x86, 0x15, 0x1d, 0x1b, 0x4c, 0x25, 0xc2, 0x5c, 0x6e, 0x9e, 0x5a, + 0xa5, 0xeb, 0x5b, 0x7b, 0x8f, 0x19, 0x54, 0xaf, 0x5b, 0xd1, 0x31, 0xfb, 0x42, 0xdf, 0x03, 0xf8, + 0x2a, 0xf2, 0x3d, 0x41, 0x99, 0x1f, 0xfc, 0xbb, 0xa7, 0xa7, 0xfc, 0xd1, 0xde, 0xc3, 0x5d, 0x4e, + 0xba, 0x41, 0xc9, 0x71, 0xda, 0x16, 0xb4, 0x03, 0x33, 0x7c, 0xd2, 0xc7, 0x44, 0x90, 0xe7, 0xb6, + 0xf0, 0xfe, 0xe9, 0xc9, 0x7f, 0xc2, 0xc9, 0x70, 0x0e, 0xad, 0x20, 0x31, 0x52, 0xbf, 0x1b, 0x87, + 0xba, 0x94, 0x8b, 0xbe, 0x62, 0x99, 0x85, 0xf3, 0x5a, 0x8e, 0xe1, 0x78, 0x5d, 0x5f, 0x68, 0xf4, + 0x1c, 0x85, 0xf3, 0x72, 0x0e, 0xbb, 0xbe, 0xd6, 0x60, 0x36, 0xc4, 0x96, 0x1f, 0xda, 0x34, 0xd7, + 0x77, 0x7a, 0x0e, 0x35, 0x7b, 0x7e, 0x96, 0x33, 0x1c, 0x7e, 0x47, 0x82, 0xd1, 0xab, 0x30, 0xc3, + 0x8e, 0x3d, 0x81, 0x59, 0x93, 0x34, 0xb1, 0x9b, 0x40, 0x5c, 0x83, 0xd9, 0x27, 0x7d, 0x1a, 0xf8, + 0xac, 0x43, 0x33, 0x34, 0x2d, 0xe2, 0xc7, 0x55, 0x95, 0x19, 0x06, 0xdf, 0x8a, 0xc1, 0xe8, 0x2d, + 0x58, 0xe0, 0xa8, 0x38, 0xb2, 0xcc, 0x20, 0x5e, 0x81, 0x43, 0xf1, 0xe8, 0x9e, 0x67, 0xb3, 0x77, + 0xd9, 0xe4, 0x96, 0x9c, 0x43, 0x2a, 0xd4, 0x2d, 0xbf, 0xd7, 0xc3, 0x1e, 0x89, 0x44, 0x1b, 0x34, + 0x1e, 0xa3, 0x5b, 0xb0, 0x6c, 0xba, 0xae, 0xff, 0xb5, 0xc1, 0x56, 0xda, 0x46, 0x4e, 0x3a, 0xfe, + 0x04, 0x57, 0x19, 0xd2, 0xa7, 0x0c, 0x47, 0x4f, 0x0b, 0xaa, 0x5e, 0x82, 0x46, 0x7c, 0x8e, 0x34, + 0xe5, 0x49, 0x18, 0x24, 0xfb, 0x56, 0xcf, 0x41, 0x2b, 0x79, 0x12, 0xea, 0x5f, 0x6a, 0x30, 0x57, + 0xe0, 0x54, 0xe8, 0x0b, 0x00, 0x6a, 0xad, 0xdc, 0xb5, 0x84, 0xb9, 0xfe, 0xcf, 0xe9, 0x9d, 0x93, + 0xda, 0x2b, 0x07, 0xeb, 0xd4, 0xfa, 0xf9, 0x27, 0xfa, 0x3e, 0x34, 0x99, 0xc5, 0x0a, 0xea, 0xdc, + 0x64, 0xdf, 0xfb, 0x27, 0xa8, 0x53, 0x59, 0x05, 0x79, 0xe6, 0x03, 0xfc, 0x5b, 0xfd, 0x93, 0x02, + 0x8d, 0x98, 0x31, 0x4d, 0xe0, 0xf8, 0x41, 0xb1, 0xb3, 0x8e, 0x64, 0x02, 0xc7, 0x60, 0xdb, 0x0c, + 0xf4, 0x1f, 0x69, 0x4a, 0xea, 0x3b, 0x00, 0x43, 0xf9, 0x0b, 0x45, 0x50, 0x0a, 0x45, 0xd0, 0xd6, + 0xa0, 0x4d, 0x35, 0xeb, 0x60, 0x7b, 0x8f, 0x84, 0x4e, 0xc0, 0x7e, 0xb0, 0xc0, 0x71, 0x22, 0xf1, + 0x90, 0x96, 0xc3, 0xcd, 0xbf, 0x2e, 0x41, 0x2b, 0x79, 0x93, 0xa2, 0x2f, 0xa1, 0x99, 0xf8, 0x61, + 0x06, 0x7a, 0x29, 0x7f, 0x68, 0xf9, 0x1f, 0x7a, 0xa8, 0x2f, 0x8f, 0xc0, 0x12, 0x6f, 0xcd, 0x31, + 0xa4, 0xc3, 0xb4, 0x68, 0xe6, 0xa3, 0x95, 0x13, 0xfa, 0xfc, 0x9c, 0xea, 0xe5, 0x91, 0xbf, 0x04, + 0xd0, 0xc6, 0xae, 0x2b, 0xc8, 0x83, 0xf3, 0xb9, 0xde, 0x3a, 0xba, 0x9a, 0x5f, 0x5b, 0xd6, 0xb9, + 0x57, 0x5f, 0xab, 0x84, 0x1b, 0xcb, 0x40, 0x60, 0xae, 0xa0, 0x59, 0x8e, 0x5e, 0x1f, 0x41, 0x25, + 0xd5, 0xb0, 0x57, 0xaf, 0x55, 0xc4, 0x8e, 0xb9, 0x3e, 0x01, 0x94, 0xef, 0xa4, 0xa3, 0xd7, 0x46, + 0x92, 0x19, 0x76, 0xea, 0xd5, 0xd7, 0xab, 0x21, 0x97, 0x0a, 0xca, 0x7b, 0xec, 0x23, 0x05, 0x4d, + 0x75, 0xf1, 0x47, 0x0a, 0x9a, 0x69, 0xdc, 0x8f, 0xa1, 0x23, 0x98, 0xcd, 0xf6, 0xdf, 0xd1, 0x5a, + 0xd9, 0xaf, 0x80, 0x72, 0xed, 0x7d, 0xf5, 0x6a, 0x15, 0xd4, 0x98, 0x19, 0x86, 0x73, 0xe9, 0x7e, + 0x37, 0x7a, 0x35, 0xbf, 0xbe, 0xb0, 0xe3, 0xaf, 0xae, 0x8e, 0x46, 0x4c, 0xca, 0x94, 0xed, 0x81, + 0x17, 0xc9, 0x54, 0xd2, 0x60, 0x2f, 0x92, 0xa9, 0xac, 0xa5, 0xae, 0x8d, 0xa1, 0x6f, 0x64, 0x63, + 0x35, 0xd3, 0x1b, 0x46, 0xeb, 0x65, 0x64, 0x8a, 0x9b, 0xd3, 0xea, 0x46, 0x65, 0xfc, 0x84, 0x37, + 0x7e, 0x09, 0xcd, 0x44, 0x8b, 0xb8, 0x28, 0x7e, 0xe4, 0x9b, 0xce, 0x45, 0xf1, 0xa3, 0xa8, 0xcf, + 0x3c, 0x86, 0xf6, 0xa1, 0x9d, 0x6a, 0x1a, 0xa3, 0x57, 0xca, 0x56, 0xa6, 0x6b, 0xab, 0xea, 0xab, + 0x23, 0xf1, 0x62, 0x1e, 0x86, 0x8c, 0x88, 0x22, 0x04, 0x96, 0x6e, 0x2e, 0x1d, 0x03, 0x5f, 0x19, + 0x85, 0x96, 0x72, 0xe5, 0x5c, 0x6b, 0xb9, 0xd0, 0x95, 0xcb, 0x5a, 0xd7, 0x85, 0xae, 0x5c, 0xde, + 0xad, 0x1e, 0x43, 0x87, 0x30, 0x93, 0x69, 0x2b, 0xa3, 0xd5, 0x32, 0x12, 0xd9, 0x96, 0xb6, 0xba, + 0x56, 0x01, 0x33, 0xe6, 0xf4, 0xff, 0xb2, 0x02, 0xc1, 0x4c, 0xee, 0x4a, 0xf9, 0xd2, 0xa1, 0x9d, + 0xbd, 0x74, 0x32, 0x52, 0x4c, 0xfa, 0x6b, 0x98, 0x2f, 0xaa, 0x36, 0xa2, 0x6b, 0x45, 0x75, 0x8d, + 0xd2, 0x92, 0xa6, 0xba, 0x5e, 0x15, 0x3d, 0x66, 0xfc, 0x19, 0xd4, 0x65, 0x6b, 0x15, 0x15, 0x5c, + 0x4a, 0x99, 0x66, 0xb4, 0xaa, 0x9d, 0x84, 0x92, 0x70, 0x95, 0x9e, 0x8c, 0x0a, 0xc3, 0x9e, 0x67, + 0x79, 0x54, 0xc8, 0x75, 0x67, 0xcb, 0xa3, 0x42, 0xbe, 0x85, 0xca, 0xd8, 0xc5, 0x66, 0x97, 0x6c, + 0x11, 0x96, 0x9b, 0x5d, 0x41, 0x07, 0xb4, 0xdc, 0xec, 0x0a, 0xbb, 0x8e, 0x63, 0xe8, 0x07, 0xf2, + 0x67, 0x12, 0xd9, 0xce, 0x20, 0x2a, 0x8d, 0x2d, 0x25, 0x1d, 0x4a, 0xf5, 0x7a, 0xf5, 0x05, 0x31, + 0xfb, 0x67, 0x32, 0x12, 0x66, 0x3a, 0x83, 0xe5, 0x91, 0xb0, 0xb8, 0x3f, 0xa9, 0x6e, 0x54, 0xc6, + 0xcf, 0x3b, 0x79, 0xb2, 0x75, 0x56, 0xae, 0xed, 0x82, 0x6e, 0x63, 0xb9, 0xb6, 0x0b, 0xbb, 0x71, + 0xcc, 0x3f, 0x8a, 0xda, 0x62, 0x45, 0xfe, 0x71, 0x42, 0xdf, 0x4e, 0x5d, 0xaf, 0x8a, 0x9e, 0x4a, + 0x14, 0xf2, 0x7d, 0x2f, 0x34, 0x72, 0xff, 0xa9, 0x3b, 0xe0, 0x5a, 0x45, 0xec, 0xf2, 0xd3, 0x95, + 0x77, 0xc2, 0x48, 0x01, 0x32, 0x77, 0xc3, 0x46, 0x65, 0xfc, 0x98, 0x77, 0x20, 0x7f, 0x74, 0x93, + 0xe8, 0x59, 0xa1, 0xab, 0x23, 0xe8, 0x24, 0x7a, 0x6e, 0xea, 0x6b, 0x95, 0x70, 0x8b, 0xbc, 0x37, + 0xd9, 0x45, 0x3a, 0xc9, 0x9e, 0x72, 0xad, 0xaf, 0x93, 0xec, 0xa9, 0xa0, 0x31, 0x55, 0xe0, 0xbd, + 0xb2, 0x79, 0x34, 0xda, 0x7b, 0x33, 0x4d, 0xac, 0xd1, 0xde, 0x9b, 0xeb, 0x4b, 0x8d, 0xa1, 0x1f, + 0x0f, 0x7f, 0x8c, 0x91, 0xaf, 0xc1, 0xa2, 0xcd, 0xd2, 0x50, 0x54, 0x5a, 0x7a, 0x56, 0xdf, 0x3c, + 0xd5, 0x9a, 0x84, 0xf2, 0x7f, 0xa6, 0xc8, 0xce, 0x6e, 0x61, 0x11, 0x14, 0xbd, 0x55, 0x81, 0x70, + 0xae, 0x8e, 0xab, 0xbe, 0x7d, 0xca, 0x55, 0x45, 0xd6, 0x90, 0xac, 0x7f, 0x96, 0x5b, 0x43, 0x41, + 0x0d, 0xb5, 0xdc, 0x1a, 0x8a, 0x4a, 0xaa, 0xda, 0x18, 0x7a, 0x00, 0x93, 0xec, 0xb9, 0x8e, 0x2e, + 0x9e, 0xfc, 0x8e, 0x57, 0x2f, 0x15, 0xcf, 0xc7, 0xaf, 0x51, 0x2a, 0xc0, 0xfe, 0x14, 0xfb, 0x27, + 0xc1, 0x9b, 0xff, 0x08, 0x00, 0x00, 0xff, 0xff, 0xc6, 0x4b, 0x03, 0xaa, 0x60, 0x30, 0x00, 0x00, } diff --git a/weed/pb/volume_server_pb/volume_server_helper.go b/weed/pb/volume_server_pb/volume_server_helper.go new file mode 100644 index 000000000..356be27ff --- /dev/null +++ b/weed/pb/volume_server_pb/volume_server_helper.go @@ -0,0 +1,5 @@ +package volume_server_pb + +func (m *RemoteFile) BackendName() string { + return m.BackendType + "." + m.BackendId +} diff --git a/weed/query/json/query_json.go b/weed/query/json/query_json.go new file mode 100644 index 000000000..46f3b1b56 --- /dev/null +++ b/weed/query/json/query_json.go @@ -0,0 +1,107 @@ +package json + +import ( + "strconv" + + "github.com/chrislusf/seaweedfs/weed/query/sqltypes" + "github.com/tidwall/gjson" + "github.com/tidwall/match" +) + +type Query struct { + Field string + Op string + Value string +} + +func QueryJson(jsonLine string, projections []string, query Query) (passedFilter bool, values []sqltypes.Value) { + if filterJson(jsonLine, query) { + passedFilter = true + fields := gjson.GetMany(jsonLine, projections...) + for _, f := range fields { + values = append(values, sqltypes.MakeTrusted(sqltypes.Type(f.Type), sqltypes.StringToBytes(f.Raw))) + } + return + } + return false, nil +} + +func filterJson(jsonLine string, query Query) bool { + + value := gjson.Get(jsonLine, query.Field) + + // copied from gjson.go queryMatches() function + rpv := query.Value + + if !value.Exists() { + return false + } + if query.Op == "" { + // the query is only looking for existence, such as: + // friends.#(name) + // which makes sure that the array "friends" has an element of + // "name" that exists + return true + } + switch value.Type { + case gjson.String: + switch query.Op { + case "=": + return value.Str == rpv + case "!=": + return value.Str != rpv + case "<": + return value.Str < rpv + case "<=": + return value.Str <= rpv + case ">": + return value.Str > rpv + case ">=": + return value.Str >= rpv + case "%": + return match.Match(value.Str, rpv) + case "!%": + return !match.Match(value.Str, rpv) + } + case gjson.Number: + rpvn, _ := strconv.ParseFloat(rpv, 64) + switch query.Op { + case "=": + return value.Num == rpvn + case "!=": + return value.Num != rpvn + case "<": + return value.Num < rpvn + case "<=": + return value.Num <= rpvn + case ">": + return value.Num > rpvn + case ">=": + return value.Num >= rpvn + } + case gjson.True: + switch query.Op { + case "=": + return rpv == "true" + case "!=": + return rpv != "true" + case ">": + return rpv == "false" + case ">=": + return true + } + case gjson.False: + switch query.Op { + case "=": + return rpv == "false" + case "!=": + return rpv != "false" + case "<": + return rpv == "true" + case "<=": + return true + } + } + return false + +} diff --git a/weed/query/json/query_json_test.go b/weed/query/json/query_json_test.go new file mode 100644 index 000000000..1794bb333 --- /dev/null +++ b/weed/query/json/query_json_test.go @@ -0,0 +1,133 @@ +package json + +import ( + "testing" + + "github.com/tidwall/gjson" +) + +func TestGjson(t *testing.T) { + data := ` + { + "quiz": { + "sport": { + "q1": { + "question": "Which one is correct team name in NBA?", + "options": [ + "New York Bulls", + "Los Angeles Kings", + "Golden State Warriros", + "Huston Rocket" + ], + "answer": "Huston Rocket" + } + }, + "maths": { + "q1": { + "question": "5 + 7 = ?", + "options": [ + "10", + "11", + "12", + "13" + ], + "answer": "12" + }, + "q2": { + "question": "12 - 8 = ?", + "options": [ + "1", + "2", + "3", + "4" + ], + "answer": "4" + } + } + } + } + + { + "fruit": "Apple", + "size": "Large", + "quiz": "Red" + } + +` + + projections := []string{"quiz", "fruit"} + + gjson.ForEachLine(data, func(line gjson.Result) bool { + println(line.Raw) + println("+++++++++++") + results := gjson.GetMany(line.Raw, projections...) + for _, result := range results { + println(result.Index, result.Type, result.String()) + } + println("-----------") + return true + }) + +} + +func TestJsonQueryRow(t *testing.T) { + + data := ` + { + "fruit": "Bl\"ue", + "size": 6, + "quiz": "green" + } + +` + selections := []string{"fruit", "size"} + + isFiltered, values := QueryJson(data, selections, Query{ + Field: "quiz", + Op: "=", + Value: "green", + }) + + if !isFiltered { + t.Errorf("should have been filtered") + } + + if values == nil { + t.Errorf("values should have been returned") + } + + buf := ToJson(nil, selections, values) + println(string(buf)) + +} + +func TestJsonQueryNumber(t *testing.T) { + + data := ` + { + "fruit": "Bl\"ue", + "size": 6, + "quiz": "green" + } + +` + selections := []string{"fruit", "quiz"} + + isFiltered, values := QueryJson(data, selections, Query{ + Field: "size", + Op: ">=", + Value: "6", + }) + + if !isFiltered { + t.Errorf("should have been filtered") + } + + if values == nil { + t.Errorf("values should have been returned") + } + + buf := ToJson(nil, selections, values) + println(string(buf)) + +} diff --git a/weed/query/json/seralize.go b/weed/query/json/seralize.go new file mode 100644 index 000000000..9bbddc2ff --- /dev/null +++ b/weed/query/json/seralize.go @@ -0,0 +1,17 @@ +package json + +import "github.com/chrislusf/seaweedfs/weed/query/sqltypes" + +func ToJson(buf []byte, selections []string, values []sqltypes.Value) []byte { + buf = append(buf, '{') + for i, value := range values { + if i > 0 { + buf = append(buf, ',') + } + buf = append(buf, selections[i]...) + buf = append(buf, ':') + buf = append(buf, value.Raw()...) + } + buf = append(buf, '}') + return buf +} diff --git a/weed/query/sqltypes/const.go b/weed/query/sqltypes/const.go new file mode 100644 index 000000000..f1d0f540e --- /dev/null +++ b/weed/query/sqltypes/const.go @@ -0,0 +1,124 @@ +package sqltypes + +// copied from vitness + +// Flag allows us to qualify types by their common properties. +type Flag int32 + +const ( + Flag_NONE Flag = 0 + Flag_ISINTEGRAL Flag = 256 + Flag_ISUNSIGNED Flag = 512 + Flag_ISFLOAT Flag = 1024 + Flag_ISQUOTED Flag = 2048 + Flag_ISTEXT Flag = 4096 + Flag_ISBINARY Flag = 8192 +) + +var Flag_name = map[int32]string{ + 0: "NONE", + 256: "ISINTEGRAL", + 512: "ISUNSIGNED", + 1024: "ISFLOAT", + 2048: "ISQUOTED", + 4096: "ISTEXT", + 8192: "ISBINARY", +} +var Flag_value = map[string]int32{ + "NONE": 0, + "ISINTEGRAL": 256, + "ISUNSIGNED": 512, + "ISFLOAT": 1024, + "ISQUOTED": 2048, + "ISTEXT": 4096, + "ISBINARY": 8192, +} + +// Type defines the various supported data types in bind vars +// and query results. +type Type int32 + +const ( + // NULL_TYPE specifies a NULL type. + Type_NULL_TYPE Type = 0 + // INT8 specifies a TINYINT type. + // Properties: 1, IsNumber. + Type_INT8 Type = 257 + // UINT8 specifies a TINYINT UNSIGNED type. + // Properties: 2, IsNumber, IsUnsigned. + Type_UINT8 Type = 770 + // INT16 specifies a SMALLINT type. + // Properties: 3, IsNumber. + Type_INT16 Type = 259 + // UINT16 specifies a SMALLINT UNSIGNED type. + // Properties: 4, IsNumber, IsUnsigned. + Type_UINT16 Type = 772 + // INT24 specifies a MEDIUMINT type. + // Properties: 5, IsNumber. + Type_INT32 Type = 263 + // UINT32 specifies a INTEGER UNSIGNED type. + // Properties: 8, IsNumber, IsUnsigned. + Type_UINT32 Type = 776 + // INT64 specifies a BIGINT type. + // Properties: 9, IsNumber. + Type_INT64 Type = 265 + // UINT64 specifies a BIGINT UNSIGNED type. + // Properties: 10, IsNumber, IsUnsigned. + Type_UINT64 Type = 778 + // FLOAT32 specifies a FLOAT type. + // Properties: 11, IsFloat. + Type_FLOAT32 Type = 1035 + // FLOAT64 specifies a DOUBLE or REAL type. + // Properties: 12, IsFloat. + Type_FLOAT64 Type = 1036 + // TIMESTAMP specifies a TIMESTAMP type. + // Properties: 13, IsQuoted. + Type_TIMESTAMP Type = 2061 + // DATE specifies a DATE type. + // Properties: 14, IsQuoted. + Type_DATE Type = 2062 + // TIME specifies a TIME type. + // Properties: 15, IsQuoted. + Type_TIME Type = 2063 + // DATETIME specifies a DATETIME type. + // Properties: 16, IsQuoted. + Type_DATETIME Type = 2064 + // YEAR specifies a YEAR type. + // Properties: 17, IsNumber, IsUnsigned. + Type_YEAR Type = 785 + // DECIMAL specifies a DECIMAL or NUMERIC type. + // Properties: 18, None. + Type_DECIMAL Type = 18 + // TEXT specifies a TEXT type. + // Properties: 19, IsQuoted, IsText. + Type_TEXT Type = 6163 + // BLOB specifies a BLOB type. + // Properties: 20, IsQuoted, IsBinary. + Type_BLOB Type = 10260 + // VARCHAR specifies a VARCHAR type. + // Properties: 21, IsQuoted, IsText. + Type_VARCHAR Type = 6165 + // VARBINARY specifies a VARBINARY type. + // Properties: 22, IsQuoted, IsBinary. + Type_VARBINARY Type = 10262 + // CHAR specifies a CHAR type. + // Properties: 23, IsQuoted, IsText. + Type_CHAR Type = 6167 + // BINARY specifies a BINARY type. + // Properties: 24, IsQuoted, IsBinary. + Type_BINARY Type = 10264 + // BIT specifies a BIT type. + // Properties: 25, IsQuoted. + Type_BIT Type = 2073 + // JSON specifies a JSON type. + // Properties: 30, IsQuoted. + Type_JSON Type = 2078 +) + +// BindVariable represents a single bind variable in a Query. +type BindVariable struct { + Type Type `protobuf:"varint,1,opt,name=type,enum=query.Type" json:"type,omitempty"` + Value []byte `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` + // values are set if type is TUPLE. + Values []*Value `protobuf:"bytes,3,rep,name=values" json:"values,omitempty"` +} diff --git a/weed/query/sqltypes/type.go b/weed/query/sqltypes/type.go new file mode 100644 index 000000000..f4f3dd471 --- /dev/null +++ b/weed/query/sqltypes/type.go @@ -0,0 +1,101 @@ +package sqltypes + +// These bit flags can be used to query on the +// common properties of types. +const ( + flagIsIntegral = int(Flag_ISINTEGRAL) + flagIsUnsigned = int(Flag_ISUNSIGNED) + flagIsFloat = int(Flag_ISFLOAT) + flagIsQuoted = int(Flag_ISQUOTED) + flagIsText = int(Flag_ISTEXT) + flagIsBinary = int(Flag_ISBINARY) +) + +// IsIntegral returns true if Type is an integral +// (signed/unsigned) that can be represented using +// up to 64 binary bits. +// If you have a Value object, use its member function. +func IsIntegral(t Type) bool { + return int(t)&flagIsIntegral == flagIsIntegral +} + +// IsSigned returns true if Type is a signed integral. +// If you have a Value object, use its member function. +func IsSigned(t Type) bool { + return int(t)&(flagIsIntegral|flagIsUnsigned) == flagIsIntegral +} + +// IsUnsigned returns true if Type is an unsigned integral. +// Caution: this is not the same as !IsSigned. +// If you have a Value object, use its member function. +func IsUnsigned(t Type) bool { + return int(t)&(flagIsIntegral|flagIsUnsigned) == flagIsIntegral|flagIsUnsigned +} + +// IsFloat returns true is Type is a floating point. +// If you have a Value object, use its member function. +func IsFloat(t Type) bool { + return int(t)&flagIsFloat == flagIsFloat +} + +// IsQuoted returns true if Type is a quoted text or binary. +// If you have a Value object, use its member function. +func IsQuoted(t Type) bool { + return (int(t)&flagIsQuoted == flagIsQuoted) && t != Bit +} + +// IsText returns true if Type is a text. +// If you have a Value object, use its member function. +func IsText(t Type) bool { + return int(t)&flagIsText == flagIsText +} + +// IsBinary returns true if Type is a binary. +// If you have a Value object, use its member function. +func IsBinary(t Type) bool { + return int(t)&flagIsBinary == flagIsBinary +} + +// isNumber returns true if the type is any type of number. +func isNumber(t Type) bool { + return IsIntegral(t) || IsFloat(t) || t == Decimal +} + +// IsTemporal returns true if Value is time type. +func IsTemporal(t Type) bool { + switch t { + case Timestamp, Date, Time, Datetime: + return true + } + return false +} + +// Vitess data types. These are idiomatically +// named synonyms for the Type values. +const ( + Null = Type_NULL_TYPE + Int8 = Type_INT8 + Uint8 = Type_UINT8 + Int16 = Type_INT16 + Uint16 = Type_UINT16 + Int32 = Type_INT32 + Uint32 = Type_UINT32 + Int64 = Type_INT64 + Uint64 = Type_UINT64 + Float32 = Type_FLOAT32 + Float64 = Type_FLOAT64 + Timestamp = Type_TIMESTAMP + Date = Type_DATE + Time = Type_TIME + Datetime = Type_DATETIME + Year = Type_YEAR + Decimal = Type_DECIMAL + Text = Type_TEXT + Blob = Type_BLOB + VarChar = Type_VARCHAR + VarBinary = Type_VARBINARY + Char = Type_CHAR + Binary = Type_BINARY + Bit = Type_BIT + TypeJSON = Type_JSON +) diff --git a/weed/query/sqltypes/unsafe.go b/weed/query/sqltypes/unsafe.go new file mode 100644 index 000000000..e322c92ce --- /dev/null +++ b/weed/query/sqltypes/unsafe.go @@ -0,0 +1,30 @@ +package sqltypes + +import ( + "reflect" + "unsafe" +) + +// BytesToString casts slice to string without copy +func BytesToString(b []byte) (s string) { + if len(b) == 0 { + return "" + } + + bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + sh := reflect.StringHeader{Data: bh.Data, Len: bh.Len} + + return *(*string)(unsafe.Pointer(&sh)) +} + +// StringToBytes casts string to slice without copy +func StringToBytes(s string) []byte { + if len(s) == 0 { + return []byte{} + } + + sh := (*reflect.StringHeader)(unsafe.Pointer(&s)) + bh := reflect.SliceHeader{Data: sh.Data, Len: sh.Len, Cap: sh.Len} + + return *(*[]byte)(unsafe.Pointer(&bh)) +} diff --git a/weed/query/sqltypes/value.go b/weed/query/sqltypes/value.go new file mode 100644 index 000000000..012de2b45 --- /dev/null +++ b/weed/query/sqltypes/value.go @@ -0,0 +1,355 @@ +package sqltypes + +import ( + "fmt" + "strconv" + "time" +) + +var ( + // NULL represents the NULL value. + NULL = Value{} + // DontEscape tells you if a character should not be escaped. + DontEscape = byte(255) + nullstr = []byte("null") +) + +type Value struct { + typ Type + val []byte +} + +// NewValue builds a Value using typ and val. If the value and typ +// don't match, it returns an error. +func NewValue(typ Type, val []byte) (v Value, err error) { + switch { + case IsSigned(typ): + if _, err := strconv.ParseInt(string(val), 0, 64); err != nil { + return NULL, err + } + return MakeTrusted(typ, val), nil + case IsUnsigned(typ): + if _, err := strconv.ParseUint(string(val), 0, 64); err != nil { + return NULL, err + } + return MakeTrusted(typ, val), nil + case IsFloat(typ) || typ == Decimal: + if _, err := strconv.ParseFloat(string(val), 64); err != nil { + return NULL, err + } + return MakeTrusted(typ, val), nil + case IsQuoted(typ) || typ == Bit || typ == Null: + return MakeTrusted(typ, val), nil + } + // All other types are unsafe or invalid. + return NULL, fmt.Errorf("invalid type specified for MakeValue: %v", typ) +} + +// MakeTrusted makes a new Value based on the type. +// This function should only be used if you know the value +// and type conform to the rules. Every place this function is +// called, a comment is needed that explains why it's justified. +// Exceptions: The current package and mysql package do not need +// comments. Other packages can also use the function to create +// VarBinary or VarChar values. +func MakeTrusted(typ Type, val []byte) Value { + + if typ == Null { + return NULL + } + + return Value{typ: typ, val: val} +} + +// NewInt64 builds an Int64 Value. +func NewInt64(v int64) Value { + return MakeTrusted(Int64, strconv.AppendInt(nil, v, 10)) +} + +// NewInt32 builds an Int64 Value. +func NewInt32(v int32) Value { + return MakeTrusted(Int32, strconv.AppendInt(nil, int64(v), 10)) +} + +// NewUint64 builds an Uint64 Value. +func NewUint64(v uint64) Value { + return MakeTrusted(Uint64, strconv.AppendUint(nil, v, 10)) +} + +// NewFloat32 builds an Float64 Value. +func NewFloat32(v float32) Value { + return MakeTrusted(Float32, strconv.AppendFloat(nil, float64(v), 'f', -1, 64)) +} + +// NewFloat64 builds an Float64 Value. +func NewFloat64(v float64) Value { + return MakeTrusted(Float64, strconv.AppendFloat(nil, v, 'g', -1, 64)) +} + +// NewVarChar builds a VarChar Value. +func NewVarChar(v string) Value { + return MakeTrusted(VarChar, []byte(v)) +} + +// NewVarBinary builds a VarBinary Value. +// The input is a string because it's the most common use case. +func NewVarBinary(v string) Value { + return MakeTrusted(VarBinary, []byte(v)) +} + +// NewIntegral builds an integral type from a string representation. +// The type will be Int64 or Uint64. Int64 will be preferred where possible. +func NewIntegral(val string) (n Value, err error) { + signed, err := strconv.ParseInt(val, 0, 64) + if err == nil { + return MakeTrusted(Int64, strconv.AppendInt(nil, signed, 10)), nil + } + unsigned, err := strconv.ParseUint(val, 0, 64) + if err != nil { + return Value{}, err + } + return MakeTrusted(Uint64, strconv.AppendUint(nil, unsigned, 10)), nil +} + +// MakeString makes a VarBinary Value. +func MakeString(val []byte) Value { + return MakeTrusted(VarBinary, val) +} + +// BuildValue builds a value from any go type. sqltype.Value is +// also allowed. +func BuildValue(goval interface{}) (v Value, err error) { + // Look for the most common types first. + switch goval := goval.(type) { + case nil: + // no op + case []byte: + v = MakeTrusted(VarBinary, goval) + case int64: + v = MakeTrusted(Int64, strconv.AppendInt(nil, int64(goval), 10)) + case uint64: + v = MakeTrusted(Uint64, strconv.AppendUint(nil, uint64(goval), 10)) + case float64: + v = MakeTrusted(Float64, strconv.AppendFloat(nil, goval, 'f', -1, 64)) + case int: + v = MakeTrusted(Int64, strconv.AppendInt(nil, int64(goval), 10)) + case int8: + v = MakeTrusted(Int8, strconv.AppendInt(nil, int64(goval), 10)) + case int16: + v = MakeTrusted(Int16, strconv.AppendInt(nil, int64(goval), 10)) + case int32: + v = MakeTrusted(Int32, strconv.AppendInt(nil, int64(goval), 10)) + case uint: + v = MakeTrusted(Uint64, strconv.AppendUint(nil, uint64(goval), 10)) + case uint8: + v = MakeTrusted(Uint8, strconv.AppendUint(nil, uint64(goval), 10)) + case uint16: + v = MakeTrusted(Uint16, strconv.AppendUint(nil, uint64(goval), 10)) + case uint32: + v = MakeTrusted(Uint32, strconv.AppendUint(nil, uint64(goval), 10)) + case float32: + v = MakeTrusted(Float32, strconv.AppendFloat(nil, float64(goval), 'f', -1, 64)) + case string: + v = MakeTrusted(VarBinary, []byte(goval)) + case time.Time: + v = MakeTrusted(Datetime, []byte(goval.Format("2006-01-02 15:04:05"))) + case Value: + v = goval + case *BindVariable: + return ValueFromBytes(goval.Type, goval.Value) + default: + return v, fmt.Errorf("unexpected type %T: %v", goval, goval) + } + return v, nil +} + +// BuildConverted is like BuildValue except that it tries to +// convert a string or []byte to an integral if the target type +// is an integral. We don't perform other implicit conversions +// because they're unsafe. +func BuildConverted(typ Type, goval interface{}) (v Value, err error) { + if IsIntegral(typ) { + switch goval := goval.(type) { + case []byte: + return ValueFromBytes(typ, goval) + case string: + return ValueFromBytes(typ, []byte(goval)) + case Value: + if goval.IsQuoted() { + return ValueFromBytes(typ, goval.Raw()) + } + } + } + return BuildValue(goval) +} + +// ValueFromBytes builds a Value using typ and val. It ensures that val +// matches the requested type. If type is an integral it's converted to +// a canonical form. Otherwise, the original representation is preserved. +func ValueFromBytes(typ Type, val []byte) (v Value, err error) { + switch { + case IsSigned(typ): + signed, err := strconv.ParseInt(string(val), 0, 64) + if err != nil { + return NULL, err + } + v = MakeTrusted(typ, strconv.AppendInt(nil, signed, 10)) + case IsUnsigned(typ): + unsigned, err := strconv.ParseUint(string(val), 0, 64) + if err != nil { + return NULL, err + } + v = MakeTrusted(typ, strconv.AppendUint(nil, unsigned, 10)) + case IsFloat(typ) || typ == Decimal: + _, err := strconv.ParseFloat(string(val), 64) + if err != nil { + return NULL, err + } + // After verification, we preserve the original representation. + fallthrough + default: + v = MakeTrusted(typ, val) + } + return v, nil +} + +// BuildIntegral builds an integral type from a string representation. +// The type will be Int64 or Uint64. Int64 will be preferred where possible. +func BuildIntegral(val string) (n Value, err error) { + signed, err := strconv.ParseInt(val, 0, 64) + if err == nil { + return MakeTrusted(Int64, strconv.AppendInt(nil, signed, 10)), nil + } + unsigned, err := strconv.ParseUint(val, 0, 64) + if err != nil { + return Value{}, err + } + return MakeTrusted(Uint64, strconv.AppendUint(nil, unsigned, 10)), nil +} + +// Type returns the type of Value. +func (v Value) Type() Type { + return v.typ +} + +// Raw returns the raw bytes. All types are currently implemented as []byte. +// You should avoid using this function. If you do, you should treat the +// bytes as read-only. +func (v Value) Raw() []byte { + return v.val +} + +// Len returns the length. +func (v Value) Len() int { + return len(v.val) +} + +// Values represents the array of Value. +type Values []Value + +// Len implements the interface. +func (vs Values) Len() int { + len := 0 + for _, v := range vs { + len += v.Len() + } + return len +} + +// String returns the raw value as a string. +func (v Value) String() string { + return BytesToString(v.val) +} + +// ToNative converts Value to a native go type. +// This does not work for sqltypes.Tuple. The function +// panics if there are inconsistencies. +func (v Value) ToNative() interface{} { + var out interface{} + var err error + switch { + case v.typ == Null: + // no-op + case IsSigned(v.typ): + out, err = v.ParseInt64() + case IsUnsigned(v.typ): + out, err = v.ParseUint64() + case IsFloat(v.typ): + out, err = v.ParseFloat64() + default: + out = v.val + } + if err != nil { + panic(err) + } + return out +} + +// ParseInt64 will parse a Value into an int64. It does +// not check the type. +func (v Value) ParseInt64() (val int64, err error) { + return strconv.ParseInt(v.String(), 10, 64) +} + +// ParseUint64 will parse a Value into a uint64. It does +// not check the type. +func (v Value) ParseUint64() (val uint64, err error) { + return strconv.ParseUint(v.String(), 10, 64) +} + +// ParseFloat64 will parse a Value into an float64. It does +// not check the type. +func (v Value) ParseFloat64() (val float64, err error) { + return strconv.ParseFloat(v.String(), 64) +} + +// IsNull returns true if Value is null. +func (v Value) IsNull() bool { + return v.typ == Null +} + +// IsIntegral returns true if Value is an integral. +func (v Value) IsIntegral() bool { + return IsIntegral(v.typ) +} + +// IsSigned returns true if Value is a signed integral. +func (v Value) IsSigned() bool { + return IsSigned(v.typ) +} + +// IsUnsigned returns true if Value is an unsigned integral. +func (v Value) IsUnsigned() bool { + return IsUnsigned(v.typ) +} + +// IsFloat returns true if Value is a float. +func (v Value) IsFloat() bool { + return IsFloat(v.typ) +} + +// IsQuoted returns true if Value must be SQL-quoted. +func (v Value) IsQuoted() bool { + return IsQuoted(v.typ) +} + +// IsText returns true if Value is a collatable text. +func (v Value) IsText() bool { + return IsText(v.typ) +} + +// IsBinary returns true if Value is binary. +func (v Value) IsBinary() bool { + return IsBinary(v.typ) +} + +// IsTemporal returns true if Value is time type. +func (v Value) IsTemporal() bool { + return IsTemporal(v.typ) +} + +// ToString returns the value as MySQL would return it as string. +// If the value is not convertible like in the case of Expression, it returns nil. +func (v Value) ToString() string { + return BytesToString(v.val) +} diff --git a/weed/replication/replicator.go b/weed/replication/replicator.go index 208e0894a..a91c2ddd3 100644 --- a/weed/replication/replicator.go +++ b/weed/replication/replicator.go @@ -1,6 +1,8 @@ package replication import ( + "context" + "fmt" "path/filepath" "strings" @@ -16,10 +18,10 @@ type Replicator struct { source *source.FilerSource } -func NewReplicator(sourceConfig util.Configuration, dataSink sink.ReplicationSink) *Replicator { +func NewReplicator(sourceConfig util.Configuration, configPrefix string, dataSink sink.ReplicationSink) *Replicator { source := &source.FilerSource{} - source.Initialize(sourceConfig) + source.Initialize(sourceConfig, configPrefix) dataSink.SetSourceFiler(source) @@ -29,12 +31,14 @@ func NewReplicator(sourceConfig util.Configuration, dataSink sink.ReplicationSin } } -func (r *Replicator) Replicate(key string, message *filer_pb.EventNotification) error { +func (r *Replicator) Replicate(ctx context.Context, key string, message *filer_pb.EventNotification) error { if !strings.HasPrefix(key, r.source.Dir) { glog.V(4).Infof("skipping %v outside of %v", key, r.source.Dir) return nil } - key = filepath.Join(r.sink.GetSinkToDirectory(), key[len(r.source.Dir):]) + newKey := filepath.ToSlash(filepath.Join(r.sink.GetSinkToDirectory(), key[len(r.source.Dir):])) + glog.V(3).Infof("replicate %s => %s", key, newKey) + key = newKey if message.OldEntry != nil && message.NewEntry == nil { glog.V(4).Infof("deleting %v", key) return r.sink.DeleteEntry(key, message.OldEntry.IsDirectory, message.DeleteChunks) @@ -48,12 +52,17 @@ func (r *Replicator) Replicate(key string, message *filer_pb.EventNotification) return nil } - foundExisting, err := r.sink.UpdateEntry(key, message.OldEntry, message.NewEntry, message.DeleteChunks) + foundExisting, err := r.sink.UpdateEntry(key, message.OldEntry, message.NewParentPath, message.NewEntry, message.DeleteChunks) if foundExisting { glog.V(4).Infof("updated %v", key) return err } + err = r.sink.DeleteEntry(key, message.OldEntry.IsDirectory, false) + if err != nil { + return fmt.Errorf("delete old entry %v: %v", key, err) + } + glog.V(4).Infof("creating missing %v", key) return r.sink.CreateEntry(key, message.NewEntry) } diff --git a/weed/replication/sink/azuresink/azure_sink.go b/weed/replication/sink/azuresink/azure_sink.go index 6adbfd75b..89e04922f 100644 --- a/weed/replication/sink/azuresink/azure_sink.go +++ b/weed/replication/sink/azuresink/azure_sink.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "net/url" + "strings" "github.com/Azure/azure-storage-blob-go/azblob" "github.com/chrislusf/seaweedfs/weed/filer2" @@ -34,12 +35,12 @@ func (g *AzureSink) GetSinkToDirectory() string { return g.dir } -func (g *AzureSink) Initialize(configuration util.Configuration) error { +func (g *AzureSink) Initialize(configuration util.Configuration, prefix string) error { return g.initialize( - configuration.GetString("account_name"), - configuration.GetString("account_key"), - configuration.GetString("container"), - configuration.GetString("directory"), + configuration.GetString(prefix+"account_name"), + configuration.GetString(prefix+"account_key"), + configuration.GetString(prefix+"container"), + configuration.GetString(prefix+"directory"), ) } @@ -71,13 +72,13 @@ func (g *AzureSink) initialize(accountName, accountKey, container, dir string) e func (g *AzureSink) DeleteEntry(key string, isDirectory, deleteIncludeChunks bool) error { + key = cleanKey(key) + if isDirectory { key = key + "/" } - ctx := context.Background() - - if _, err := g.containerURL.NewBlobURL(key).Delete(ctx, + if _, err := g.containerURL.NewBlobURL(key).Delete(context.Background(), azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{}); err != nil { return fmt.Errorf("azure delete %s/%s: %v", g.container, key, err) } @@ -88,6 +89,8 @@ func (g *AzureSink) DeleteEntry(key string, isDirectory, deleteIncludeChunks boo func (g *AzureSink) CreateEntry(key string, entry *filer_pb.Entry) error { + key = cleanKey(key) + if entry.IsDirectory { return nil } @@ -95,13 +98,11 @@ func (g *AzureSink) CreateEntry(key string, entry *filer_pb.Entry) error { totalSize := filer2.TotalSize(entry.Chunks) chunkViews := filer2.ViewFromChunks(entry.Chunks, 0, int(totalSize)) - ctx := context.Background() - // Create a URL that references a to-be-created blob in your // Azure Storage account's container. appendBlobURL := g.containerURL.NewAppendBlobURL(key) - _, err := appendBlobURL.Create(ctx, azblob.BlobHTTPHeaders{}, azblob.Metadata{}, azblob.BlobAccessConditions{}) + _, err := appendBlobURL.Create(context.Background(), azblob.BlobHTTPHeaders{}, azblob.Metadata{}, azblob.BlobAccessConditions{}) if err != nil { return err } @@ -114,8 +115,8 @@ func (g *AzureSink) CreateEntry(key string, entry *filer_pb.Entry) error { } var writeErr error - _, readErr := util.ReadUrlAsStream(fileUrl, chunk.Offset, int(chunk.Size), func(data []byte) { - _, writeErr = appendBlobURL.AppendBlock(ctx, bytes.NewReader(data), azblob.AppendBlobAccessConditions{}, nil) + readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk, chunk.Offset, int(chunk.Size), func(data []byte) { + _, writeErr = appendBlobURL.AppendBlock(context.Background(), bytes.NewReader(data), azblob.AppendBlobAccessConditions{}, nil) }) if readErr != nil { @@ -131,7 +132,15 @@ func (g *AzureSink) CreateEntry(key string, entry *filer_pb.Entry) error { } -func (g *AzureSink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { +func (g *AzureSink) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { + key = cleanKey(key) // TODO improve efficiency return false, nil } + +func cleanKey(key string) string { + if strings.HasPrefix(key, "/") { + key = key[1:] + } + return key +} diff --git a/weed/replication/sink/b2sink/b2_sink.go b/weed/replication/sink/b2sink/b2_sink.go index ce0e9eb3c..df0653f73 100644 --- a/weed/replication/sink/b2sink/b2_sink.go +++ b/weed/replication/sink/b2sink/b2_sink.go @@ -2,6 +2,8 @@ package B2Sink import ( "context" + "strings" + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/replication/sink" @@ -29,12 +31,12 @@ func (g *B2Sink) GetSinkToDirectory() string { return g.dir } -func (g *B2Sink) Initialize(configuration util.Configuration) error { +func (g *B2Sink) Initialize(configuration util.Configuration, prefix string) error { return g.initialize( - configuration.GetString("account_id"), - configuration.GetString("account_key"), - configuration.GetString("bucket"), - configuration.GetString("directory"), + configuration.GetString(prefix+"b2_account_id"), + configuration.GetString(prefix+"b2_master_application_key"), + configuration.GetString(prefix+"bucket"), + configuration.GetString(prefix+"directory"), ) } @@ -43,10 +45,9 @@ func (g *B2Sink) SetSourceFiler(s *source.FilerSource) { } func (g *B2Sink) initialize(accountId, accountKey, bucket, dir string) error { - ctx := context.Background() - client, err := b2.NewClient(ctx, accountId, accountKey) + client, err := b2.NewClient(context.Background(), accountId, accountKey) if err != nil { - return nil + return err } g.client = client @@ -58,25 +59,27 @@ func (g *B2Sink) initialize(accountId, accountKey, bucket, dir string) error { func (g *B2Sink) DeleteEntry(key string, isDirectory, deleteIncludeChunks bool) error { + key = cleanKey(key) + if isDirectory { key = key + "/" } - ctx := context.Background() - - bucket, err := g.client.Bucket(ctx, g.bucket) + bucket, err := g.client.Bucket(context.Background(), g.bucket) if err != nil { return err } targetObject := bucket.Object(key) - return targetObject.Delete(ctx) + return targetObject.Delete(context.Background()) } func (g *B2Sink) CreateEntry(key string, entry *filer_pb.Entry) error { + key = cleanKey(key) + if entry.IsDirectory { return nil } @@ -84,15 +87,13 @@ func (g *B2Sink) CreateEntry(key string, entry *filer_pb.Entry) error { totalSize := filer2.TotalSize(entry.Chunks) chunkViews := filer2.ViewFromChunks(entry.Chunks, 0, int(totalSize)) - ctx := context.Background() - - bucket, err := g.client.Bucket(ctx, g.bucket) + bucket, err := g.client.Bucket(context.Background(), g.bucket) if err != nil { return err } targetObject := bucket.Object(key) - writer := targetObject.NewWriter(ctx) + writer := targetObject.NewWriter(context.Background()) for _, chunk := range chunkViews { @@ -102,7 +103,7 @@ func (g *B2Sink) CreateEntry(key string, entry *filer_pb.Entry) error { } var writeErr error - _, readErr := util.ReadUrlAsStream(fileUrl, chunk.Offset, int(chunk.Size), func(data []byte) { + readErr := util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk, chunk.Offset, int(chunk.Size), func(data []byte) { _, err := writer.Write(data) if err != nil { writeErr = err @@ -122,7 +123,17 @@ func (g *B2Sink) CreateEntry(key string, entry *filer_pb.Entry) error { } -func (g *B2Sink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { +func (g *B2Sink) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { + + key = cleanKey(key) + // TODO improve efficiency return false, nil } + +func cleanKey(key string) string { + if strings.HasPrefix(key, "/") { + key = key[1:] + } + return key +} diff --git a/weed/replication/sink/filersink/fetch_write.go b/weed/replication/sink/filersink/fetch_write.go index c14566723..360a34620 100644 --- a/weed/replication/sink/filersink/fetch_write.go +++ b/weed/replication/sink/filersink/fetch_write.go @@ -6,38 +6,44 @@ import ( "strings" "sync" + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/security" ) -func (fs *FilerSink) replicateChunks(sourceChunks []*filer_pb.FileChunk) (replicatedChunks []*filer_pb.FileChunk, err error) { +func (fs *FilerSink) replicateChunks(sourceChunks []*filer_pb.FileChunk, dir string) (replicatedChunks []*filer_pb.FileChunk, err error) { if len(sourceChunks) == 0 { return } + + replicatedChunks = make([]*filer_pb.FileChunk, len(sourceChunks)) + var wg sync.WaitGroup - for _, sourceChunk := range sourceChunks { + for chunkIndex, sourceChunk := range sourceChunks { wg.Add(1) - go func(chunk *filer_pb.FileChunk) { + go func(chunk *filer_pb.FileChunk, index int) { defer wg.Done() - replicatedChunk, e := fs.replicateOneChunk(chunk) + replicatedChunk, e := fs.replicateOneChunk(chunk, dir) if e != nil { err = e } - replicatedChunks = append(replicatedChunks, replicatedChunk) - }(sourceChunk) + replicatedChunks[index] = replicatedChunk + }(sourceChunk, chunkIndex) } wg.Wait() return } -func (fs *FilerSink) replicateOneChunk(sourceChunk *filer_pb.FileChunk) (*filer_pb.FileChunk, error) { +func (fs *FilerSink) replicateOneChunk(sourceChunk *filer_pb.FileChunk, dir string) (*filer_pb.FileChunk, error) { - fileId, err := fs.fetchAndWrite(sourceChunk) + fileId, err := fs.fetchAndWrite(sourceChunk, dir) if err != nil { - return nil, fmt.Errorf("copy %s: %v", sourceChunk.FileId, err) + return nil, fmt.Errorf("copy %s: %v", sourceChunk.GetFileIdString(), err) } return &filer_pb.FileChunk{ @@ -46,19 +52,22 @@ func (fs *FilerSink) replicateOneChunk(sourceChunk *filer_pb.FileChunk) (*filer_ Size: sourceChunk.Size, Mtime: sourceChunk.Mtime, ETag: sourceChunk.ETag, - SourceFileId: sourceChunk.FileId, + SourceFileId: sourceChunk.GetFileIdString(), + CipherKey: sourceChunk.CipherKey, + IsGzipped: sourceChunk.IsGzipped, }, nil } -func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk) (fileId string, err error) { +func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk, dir string) (fileId string, err error) { - filename, header, readCloser, err := fs.filerSource.ReadPart(sourceChunk.FileId) + filename, header, readCloser, err := fs.filerSource.ReadPart(sourceChunk.GetFileIdString()) if err != nil { - return "", fmt.Errorf("read part %s: %v", sourceChunk.FileId, err) + return "", fmt.Errorf("read part %s: %v", sourceChunk.GetFileIdString(), err) } defer readCloser.Close() var host string + var auth security.EncodedJwt if err := fs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { @@ -68,6 +77,7 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk) (fileId stri Collection: fs.collection, TtlSec: fs.ttlSec, DataCenter: fs.dataCenter, + ParentPath: dir, } resp, err := client.AssignVolume(context.Background(), request) @@ -75,8 +85,11 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk) (fileId stri glog.V(0).Infof("assign volume failure %v: %v", request, err) return err } + if resp.Error != "" { + return fmt.Errorf("assign volume failure %v: %v", request, resp.Error) + } - fileId, host = resp.FileId, resp.Url + fileId, host, auth = resp.FileId, resp.Url, security.EncodedJwt(resp.Auth) return nil }); err != nil { @@ -87,8 +100,8 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk) (fileId stri glog.V(4).Infof("replicating %s to %s header:%+v", filename, fileUrl, header) - uploadResult, err := operation.Upload(fileUrl, filename, readCloser, - "gzip" == header.Get("Content-Encoding"), header.Get("Content-Type"), nil, "") + // fetch data as is, regardless whether it is encrypted or not + uploadResult, err := operation.Upload(fileUrl, filename, false, readCloser, "gzip" == header.Get("Content-Encoding"), header.Get("Content-Type"), nil, auth) if err != nil { glog.V(0).Infof("upload data %v to %s: %v", filename, fileUrl, err) return "", fmt.Errorf("upload data: %v", err) @@ -103,15 +116,11 @@ func (fs *FilerSink) fetchAndWrite(sourceChunk *filer_pb.FileChunk) (fileId stri func (fs *FilerSink) withFilerClient(fn func(filer_pb.SeaweedFilerClient) error) error { - grpcConnection, err := util.GrpcDial(fs.grpcAddress) - if err != nil { - return fmt.Errorf("fail to dial %s: %v", fs.grpcAddress, err) - } - defer grpcConnection.Close() - - client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return pb.WithCachedGrpcClient(func(grpcConnection *grpc.ClientConn) error { + client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return fn(client) + }, fs.grpcAddress, fs.grpcDialOption) - return fn(client) } func volumeId(fileId string) string { diff --git a/weed/replication/sink/filersink/filer_sink.go b/weed/replication/sink/filersink/filer_sink.go index e2974511a..838c2c441 100644 --- a/weed/replication/sink/filersink/filer_sink.go +++ b/weed/replication/sink/filersink/filer_sink.go @@ -4,6 +4,10 @@ import ( "context" "fmt" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" @@ -13,13 +17,14 @@ import ( ) type FilerSink struct { - filerSource *source.FilerSource - grpcAddress string - dir string - replication string - collection string - ttlSec int32 - dataCenter string + filerSource *source.FilerSource + grpcAddress string + dir string + replication string + collection string + ttlSec int32 + dataCenter string + grpcDialOption grpc.DialOption } func init() { @@ -34,13 +39,13 @@ func (fs *FilerSink) GetSinkToDirectory() string { return fs.dir } -func (fs *FilerSink) Initialize(configuration util.Configuration) error { +func (fs *FilerSink) Initialize(configuration util.Configuration, prefix string) error { return fs.initialize( - configuration.GetString("grpcAddress"), - configuration.GetString("directory"), - configuration.GetString("replication"), - configuration.GetString("collection"), - configuration.GetInt("ttlSec"), + configuration.GetString(prefix+"grpcAddress"), + configuration.GetString(prefix+"directory"), + configuration.GetString(prefix+"replication"), + configuration.GetString(prefix+"collection"), + configuration.GetInt(prefix+"ttlSec"), ) } @@ -55,6 +60,7 @@ func (fs *FilerSink) initialize(grpcAddress string, dir string, fs.replication = replication fs.collection = collection fs.ttlSec = int32(ttlSec) + fs.grpcDialOption = security.LoadClientTLS(util.GetViper(), "grpc.client") return nil } @@ -66,7 +72,6 @@ func (fs *FilerSink) DeleteEntry(key string, isDirectory, deleteIncludeChunks bo request := &filer_pb.DeleteEntryRequest{ Directory: dir, Name: name, - IsDirectory: isDirectory, IsDeleteData: deleteIncludeChunks, } @@ -86,7 +91,6 @@ func (fs *FilerSink) CreateEntry(key string, entry *filer_pb.Entry) error { return fs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { dir, name := filer2.FullPath(key).DirAndName() - ctx := context.Background() // look up existing entry lookupRequest := &filer_pb.LookupDirectoryEntryRequest{ @@ -94,14 +98,14 @@ func (fs *FilerSink) CreateEntry(key string, entry *filer_pb.Entry) error { Name: name, } glog.V(1).Infof("lookup: %v", lookupRequest) - if resp, err := client.LookupDirectoryEntry(ctx, lookupRequest); err == nil { + if resp, err := filer_pb.LookupEntry(client, lookupRequest); err == nil { if filer2.ETag(resp.Entry.Chunks) == filer2.ETag(entry.Chunks) { glog.V(0).Infof("already replicated %s", key) return nil } } - replicatedChunks, err := fs.replicateChunks(entry.Chunks) + replicatedChunks, err := fs.replicateChunks(entry.Chunks, dir) if err != nil { glog.V(0).Infof("replicate entry chunks %s: %v", key, err) @@ -121,7 +125,7 @@ func (fs *FilerSink) CreateEntry(key string, entry *filer_pb.Entry) error { } glog.V(1).Infof("create: %v", request) - if _, err := client.CreateEntry(ctx, request); err != nil { + if err := filer_pb.CreateEntry(client, request); err != nil { glog.V(0).Infof("create entry %s: %v", key, err) return fmt.Errorf("create entry %s: %v", key, err) } @@ -130,9 +134,7 @@ func (fs *FilerSink) CreateEntry(key string, entry *filer_pb.Entry) error { }) } -func (fs *FilerSink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { - - ctx := context.Background() +func (fs *FilerSink) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { dir, name := filer2.FullPath(key).DirAndName() @@ -146,7 +148,7 @@ func (fs *FilerSink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, } glog.V(4).Infof("lookup entry: %v", request) - resp, err := client.LookupDirectoryEntry(ctx, request) + resp, err := filer_pb.LookupEntry(client, request) if err != nil { glog.V(0).Infof("lookup %s: %v", key, err) return err @@ -163,7 +165,11 @@ func (fs *FilerSink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, glog.V(0).Infof("oldEntry %+v, newEntry %+v, existingEntry: %+v", oldEntry, newEntry, existingEntry) - if filer2.ETag(newEntry.Chunks) == filer2.ETag(existingEntry.Chunks) { + if existingEntry.Attributes.Mtime > newEntry.Attributes.Mtime { + // skip if already changed + // this usually happens when the messages are not ordered + glog.V(0).Infof("late updates %s", key) + } else if filer2.ETag(newEntry.Chunks) == filer2.ETag(existingEntry.Chunks) { // skip if no change // this usually happens when retrying the replication glog.V(0).Infof("already replicated %s", key) @@ -174,11 +180,11 @@ func (fs *FilerSink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, // delete the chunks that are deleted from the source if deleteIncludeChunks { // remove the deleted chunks. Actual data deletion happens in filer UpdateEntry FindUnusedFileChunks - existingEntry.Chunks = minusChunks(existingEntry.Chunks, deletedChunks) + existingEntry.Chunks = filer2.MinusChunks(existingEntry.Chunks, deletedChunks) } // replicate the chunks that are new in the source - replicatedChunks, err := fs.replicateChunks(newChunks) + replicatedChunks, err := fs.replicateChunks(newChunks, newParentPath) if err != nil { return true, fmt.Errorf("replicte %s chunks error: %v", key, err) } @@ -189,11 +195,11 @@ func (fs *FilerSink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, return true, fs.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { request := &filer_pb.UpdateEntryRequest{ - Directory: dir, + Directory: newParentPath, Entry: existingEntry, } - if _, err := client.UpdateEntry(ctx, request); err != nil { + if _, err := client.UpdateEntry(context.Background(), request); err != nil { return fmt.Errorf("update existingEntry %s: %v", key, err) } @@ -202,23 +208,7 @@ func (fs *FilerSink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, } func compareChunks(oldEntry, newEntry *filer_pb.Entry) (deletedChunks, newChunks []*filer_pb.FileChunk) { - deletedChunks = minusChunks(oldEntry.Chunks, newEntry.Chunks) - newChunks = minusChunks(newEntry.Chunks, oldEntry.Chunks) - return -} - -func minusChunks(as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk) { - for _, a := range as { - found := false - for _, b := range bs { - if a.FileId == b.FileId { - found = true - break - } - } - if !found { - delta = append(delta, a) - } - } + deletedChunks = filer2.MinusChunks(oldEntry.Chunks, newEntry.Chunks) + newChunks = filer2.MinusChunks(newEntry.Chunks, oldEntry.Chunks) return } diff --git a/weed/replication/sink/gcssink/gcs_sink.go b/weed/replication/sink/gcssink/gcs_sink.go index ae7751c61..694399274 100644 --- a/weed/replication/sink/gcssink/gcs_sink.go +++ b/weed/replication/sink/gcssink/gcs_sink.go @@ -3,16 +3,17 @@ package gcssink import ( "context" "fmt" - "log" "os" "cloud.google.com/go/storage" + "google.golang.org/api/option" + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/replication/sink" "github.com/chrislusf/seaweedfs/weed/replication/source" "github.com/chrislusf/seaweedfs/weed/util" - "google.golang.org/api/option" ) type GcsSink struct { @@ -34,11 +35,11 @@ func (g *GcsSink) GetSinkToDirectory() string { return g.dir } -func (g *GcsSink) Initialize(configuration util.Configuration) error { +func (g *GcsSink) Initialize(configuration util.Configuration, prefix string) error { return g.initialize( - configuration.GetString("google_application_credentials"), - configuration.GetString("bucket"), - configuration.GetString("directory"), + configuration.GetString(prefix+"google_application_credentials"), + configuration.GetString(prefix+"bucket"), + configuration.GetString(prefix+"directory"), ) } @@ -50,18 +51,17 @@ func (g *GcsSink) initialize(google_application_credentials, bucketName, dir str g.bucket = bucketName g.dir = dir - ctx := context.Background() // Creates a client. if google_application_credentials == "" { var found bool google_application_credentials, found = os.LookupEnv("GOOGLE_APPLICATION_CREDENTIALS") if !found { - log.Fatalf("need to specific GOOGLE_APPLICATION_CREDENTIALS env variable or google_application_credentials in replication.toml") + glog.Fatalf("need to specific GOOGLE_APPLICATION_CREDENTIALS env variable or google_application_credentials in replication.toml") } } - client, err := storage.NewClient(ctx, option.WithCredentialsFile(google_application_credentials)) + client, err := storage.NewClient(context.Background(), option.WithCredentialsFile(google_application_credentials)) if err != nil { - log.Fatalf("Failed to create client: %v", err) + glog.Fatalf("Failed to create client: %v", err) } g.client = client @@ -92,9 +92,7 @@ func (g *GcsSink) CreateEntry(key string, entry *filer_pb.Entry) error { totalSize := filer2.TotalSize(entry.Chunks) chunkViews := filer2.ViewFromChunks(entry.Chunks, 0, int(totalSize)) - ctx := context.Background() - - wc := g.client.Bucket(g.bucket).Object(key).NewWriter(ctx) + wc := g.client.Bucket(g.bucket).Object(key).NewWriter(context.Background()) for _, chunk := range chunkViews { @@ -103,7 +101,7 @@ func (g *GcsSink) CreateEntry(key string, entry *filer_pb.Entry) error { return err } - _, err = util.ReadUrlAsStream(fileUrl, chunk.Offset, int(chunk.Size), func(data []byte) { + err = util.ReadUrlAsStream(fileUrl, nil, false, chunk.IsFullChunk, chunk.Offset, int(chunk.Size), func(data []byte) { wc.Write(data) }) @@ -121,7 +119,7 @@ func (g *GcsSink) CreateEntry(key string, entry *filer_pb.Entry) error { } -func (g *GcsSink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { +func (g *GcsSink) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { // TODO improve efficiency return false, nil } diff --git a/weed/replication/sink/replication_sink.go b/weed/replication/sink/replication_sink.go index 0a86139d3..6d85f660a 100644 --- a/weed/replication/sink/replication_sink.go +++ b/weed/replication/sink/replication_sink.go @@ -8,10 +8,10 @@ import ( type ReplicationSink interface { GetName() string - Initialize(configuration util.Configuration) error + Initialize(configuration util.Configuration, prefix string) error DeleteEntry(key string, isDirectory, deleteIncludeChunks bool) error CreateEntry(key string, entry *filer_pb.Entry) error - UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) + UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) GetSinkToDirectory() string SetSourceFiler(s *source.FilerSource) } diff --git a/weed/replication/sink/s3sink/s3_sink.go b/weed/replication/sink/s3sink/s3_sink.go index b9caa839b..e0aee5ada 100644 --- a/weed/replication/sink/s3sink/s3_sink.go +++ b/weed/replication/sink/s3sink/s3_sink.go @@ -1,7 +1,9 @@ package S3Sink import ( + "context" "fmt" + "strings" "sync" "github.com/aws/aws-sdk-go/aws" @@ -9,7 +11,9 @@ import ( "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/s3/s3iface" + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/replication/sink" "github.com/chrislusf/seaweedfs/weed/replication/source" @@ -36,13 +40,16 @@ func (s3sink *S3Sink) GetSinkToDirectory() string { return s3sink.dir } -func (s3sink *S3Sink) Initialize(configuration util.Configuration) error { +func (s3sink *S3Sink) Initialize(configuration util.Configuration, prefix string) error { + glog.V(0).Infof("sink.s3.region: %v", configuration.GetString(prefix+"region")) + glog.V(0).Infof("sink.s3.bucket: %v", configuration.GetString(prefix+"bucket")) + glog.V(0).Infof("sink.s3.directory: %v", configuration.GetString(prefix+"directory")) return s3sink.initialize( - configuration.GetString("aws_access_key_id"), - configuration.GetString("aws_secret_access_key"), - configuration.GetString("region"), - configuration.GetString("bucket"), - configuration.GetString("directory"), + configuration.GetString(prefix+"aws_access_key_id"), + configuration.GetString(prefix+"aws_secret_access_key"), + configuration.GetString(prefix+"region"), + configuration.GetString(prefix+"bucket"), + configuration.GetString(prefix+"directory"), ) } @@ -50,7 +57,7 @@ func (s3sink *S3Sink) SetSourceFiler(s *source.FilerSource) { s3sink.filerSource = s } -func (s3sink *S3Sink) initialize(awsAccessKeyId, aswSecretAccessKey, region, bucket, dir string) error { +func (s3sink *S3Sink) initialize(awsAccessKeyId, awsSecretAccessKey, region, bucket, dir string) error { s3sink.region = region s3sink.bucket = bucket s3sink.dir = dir @@ -58,8 +65,8 @@ func (s3sink *S3Sink) initialize(awsAccessKeyId, aswSecretAccessKey, region, buc config := &aws.Config{ Region: aws.String(s3sink.region), } - if awsAccessKeyId != "" && aswSecretAccessKey != "" { - config.Credentials = credentials.NewStaticCredentials(awsAccessKeyId, aswSecretAccessKey, "") + if awsAccessKeyId != "" && awsSecretAccessKey != "" { + config.Credentials = credentials.NewStaticCredentials(awsAccessKeyId, awsSecretAccessKey, "") } sess, err := session.NewSession(config) @@ -73,6 +80,8 @@ func (s3sink *S3Sink) initialize(awsAccessKeyId, aswSecretAccessKey, region, buc func (s3sink *S3Sink) DeleteEntry(key string, isDirectory, deleteIncludeChunks bool) error { + key = cleanKey(key) + if isDirectory { key = key + "/" } @@ -82,6 +91,7 @@ func (s3sink *S3Sink) DeleteEntry(key string, isDirectory, deleteIncludeChunks b } func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry) error { + key = cleanKey(key) if entry.IsDirectory { return nil @@ -95,19 +105,20 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry) error { totalSize := filer2.TotalSize(entry.Chunks) chunkViews := filer2.ViewFromChunks(entry.Chunks, 0, int(totalSize)) - var parts []*s3.CompletedPart + parts := make([]*s3.CompletedPart, len(chunkViews)) + var wg sync.WaitGroup for chunkIndex, chunk := range chunkViews { partId := chunkIndex + 1 wg.Add(1) - go func(chunk *filer2.ChunkView) { + go func(chunk *filer2.ChunkView, index int) { defer wg.Done() if part, uploadErr := s3sink.uploadPart(key, uploadId, partId, chunk); uploadErr != nil { err = uploadErr } else { - parts = append(parts, part) + parts[index] = part } - }(chunk) + }(chunk, chunkIndex) } wg.Wait() @@ -116,11 +127,19 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry) error { return err } - return s3sink.completeMultipartUpload(key, uploadId, parts) + return s3sink.completeMultipartUpload(context.Background(), key, uploadId, parts) } -func (s3sink *S3Sink) UpdateEntry(key string, oldEntry, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { +func (s3sink *S3Sink) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParentPath string, newEntry *filer_pb.Entry, deleteIncludeChunks bool) (foundExistingEntry bool, err error) { + key = cleanKey(key) // TODO improve efficiency return false, nil } + +func cleanKey(key string) string { + if strings.HasPrefix(key, "/") { + key = key[1:] + } + return key +} diff --git a/weed/replication/sink/s3sink/s3_write.go b/weed/replication/sink/s3sink/s3_write.go index 8ff722e67..854688b1e 100644 --- a/weed/replication/sink/s3sink/s3_write.go +++ b/weed/replication/sink/s3sink/s3_write.go @@ -2,6 +2,7 @@ package S3Sink import ( "bytes" + "context" "fmt" "io" @@ -81,7 +82,7 @@ func (s3sink *S3Sink) abortMultipartUpload(key, uploadId string) error { } // To complete multipart upload -func (s3sink *S3Sink) completeMultipartUpload(key, uploadId string, parts []*s3.CompletedPart) error { +func (s3sink *S3Sink) completeMultipartUpload(ctx context.Context, key, uploadId string, parts []*s3.CompletedPart) error { input := &s3.CompleteMultipartUploadInput{ Bucket: aws.String(s3sink.bucket), Key: aws.String(key), @@ -161,6 +162,6 @@ func (s3sink *S3Sink) buildReadSeeker(chunk *filer2.ChunkView) (io.ReadSeeker, e return nil, err } buf := make([]byte, chunk.Size) - util.ReadUrl(fileUrl, chunk.Offset, int(chunk.Size), buf) + util.ReadUrl(fileUrl, nil, false,false, chunk.Offset, int(chunk.Size), buf) return bytes.NewReader(buf), nil } diff --git a/weed/replication/source/filer_source.go b/weed/replication/source/filer_source.go index efe71e706..90bcffdf0 100644 --- a/weed/replication/source/filer_source.go +++ b/weed/replication/source/filer_source.go @@ -7,6 +7,11 @@ import ( "net/http" "strings" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" "github.com/chrislusf/seaweedfs/weed/util" @@ -17,20 +22,22 @@ type ReplicationSource interface { } type FilerSource struct { - grpcAddress string - Dir string + grpcAddress string + grpcDialOption grpc.DialOption + Dir string } -func (fs *FilerSource) Initialize(configuration util.Configuration) error { +func (fs *FilerSource) Initialize(configuration util.Configuration, prefix string) error { return fs.initialize( - configuration.GetString("grpcAddress"), - configuration.GetString("directory"), + configuration.GetString(prefix+"grpcAddress"), + configuration.GetString(prefix+"directory"), ) } func (fs *FilerSource) initialize(grpcAddress string, dir string) (err error) { fs.grpcAddress = grpcAddress fs.Dir = dir + fs.grpcDialOption = security.LoadClientTLS(util.GetViper(), "grpc.client") return nil } @@ -86,15 +93,11 @@ func (fs *FilerSource) ReadPart(part string) (filename string, header http.Heade func (fs *FilerSource) withFilerClient(fn func(filer_pb.SeaweedFilerClient) error) error { - grpcConnection, err := util.GrpcDial(fs.grpcAddress) - if err != nil { - return fmt.Errorf("fail to dial %s: %v", fs.grpcAddress, err) - } - defer grpcConnection.Close() - - client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return pb.WithCachedGrpcClient(func(grpcConnection *grpc.ClientConn) error { + client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return fn(client) + }, fs.grpcAddress, fs.grpcDialOption) - return fn(client) } func volumeId(fileId string) string { diff --git a/weed/replication/sub/notification_aws_sqs.go b/weed/replication/sub/notification_aws_sqs.go new file mode 100644 index 000000000..06869e619 --- /dev/null +++ b/weed/replication/sub/notification_aws_sqs.go @@ -0,0 +1,111 @@ +package sub + +import ( + "fmt" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/sqs" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/golang/protobuf/proto" +) + +func init() { + NotificationInputs = append(NotificationInputs, &AwsSqsInput{}) +} + +type AwsSqsInput struct { + svc *sqs.SQS + queueUrl string +} + +func (k *AwsSqsInput) GetName() string { + return "aws_sqs" +} + +func (k *AwsSqsInput) Initialize(configuration util.Configuration, prefix string) error { + glog.V(0).Infof("replication.notification.aws_sqs.region: %v", configuration.GetString(prefix+"region")) + glog.V(0).Infof("replication.notification.aws_sqs.sqs_queue_name: %v", configuration.GetString(prefix+"sqs_queue_name")) + return k.initialize( + configuration.GetString(prefix+"aws_access_key_id"), + configuration.GetString(prefix+"aws_secret_access_key"), + configuration.GetString(prefix+"region"), + configuration.GetString(prefix+"sqs_queue_name"), + ) +} + +func (k *AwsSqsInput) initialize(awsAccessKeyId, awsSecretAccessKey, region, queueName string) (err error) { + + config := &aws.Config{ + Region: aws.String(region), + } + if awsAccessKeyId != "" && awsSecretAccessKey != "" { + config.Credentials = credentials.NewStaticCredentials(awsAccessKeyId, awsSecretAccessKey, "") + } + + sess, err := session.NewSession(config) + if err != nil { + return fmt.Errorf("create aws session: %v", err) + } + k.svc = sqs.New(sess) + + result, err := k.svc.GetQueueUrl(&sqs.GetQueueUrlInput{ + QueueName: aws.String(queueName), + }) + if err != nil { + if aerr, ok := err.(awserr.Error); ok && aerr.Code() == sqs.ErrCodeQueueDoesNotExist { + return fmt.Errorf("unable to find queue %s", queueName) + } + return fmt.Errorf("get queue %s url: %v", queueName, err) + } + + k.queueUrl = *result.QueueUrl + + return nil +} + +func (k *AwsSqsInput) ReceiveMessage() (key string, message *filer_pb.EventNotification, err error) { + + // receive message + result, err := k.svc.ReceiveMessage(&sqs.ReceiveMessageInput{ + AttributeNames: []*string{ + aws.String(sqs.MessageSystemAttributeNameSentTimestamp), + }, + MessageAttributeNames: []*string{ + aws.String(sqs.QueueAttributeNameAll), + }, + QueueUrl: &k.queueUrl, + MaxNumberOfMessages: aws.Int64(1), + VisibilityTimeout: aws.Int64(20), // 20 seconds + WaitTimeSeconds: aws.Int64(20), + }) + if err != nil { + err = fmt.Errorf("receive message from sqs %s: %v", k.queueUrl, err) + return + } + if len(result.Messages) == 0 { + return + } + + // process the message + key = *result.Messages[0].Attributes["key"] + text := *result.Messages[0].Body + message = &filer_pb.EventNotification{} + err = proto.UnmarshalText(text, message) + + // delete the message + _, err = k.svc.DeleteMessage(&sqs.DeleteMessageInput{ + QueueUrl: &k.queueUrl, + ReceiptHandle: result.Messages[0].ReceiptHandle, + }) + + if err != nil { + glog.V(1).Infof("delete message from sqs %s: %v", k.queueUrl, err) + } + + return +} diff --git a/weed/replication/sub/notification_gocdk_pub_sub.go b/weed/replication/sub/notification_gocdk_pub_sub.go new file mode 100644 index 000000000..9726096e5 --- /dev/null +++ b/weed/replication/sub/notification_gocdk_pub_sub.go @@ -0,0 +1,50 @@ +package sub + +import ( + "context" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/golang/protobuf/proto" + "gocloud.dev/pubsub" + _ "gocloud.dev/pubsub/awssnssqs" + // _ "gocloud.dev/pubsub/azuresb" + _ "gocloud.dev/pubsub/gcppubsub" + _ "gocloud.dev/pubsub/natspubsub" + _ "gocloud.dev/pubsub/rabbitpubsub" +) + +func init() { + NotificationInputs = append(NotificationInputs, &GoCDKPubSubInput{}) +} + +type GoCDKPubSubInput struct { + sub *pubsub.Subscription +} + +func (k *GoCDKPubSubInput) GetName() string { + return "gocdk_pub_sub" +} + +func (k *GoCDKPubSubInput) Initialize(configuration util.Configuration, prefix string) error { + subURL := configuration.GetString(prefix + "sub_url") + glog.V(0).Infof("notification.gocdk_pub_sub.sub_url: %v", subURL) + sub, err := pubsub.OpenSubscription(context.Background(), subURL) + if err != nil { + return err + } + k.sub = sub + return nil +} + +func (k *GoCDKPubSubInput) ReceiveMessage() (key string, message *filer_pb.EventNotification, err error) { + msg, err := k.sub.Receive(context.Background()) + key = msg.Metadata["key"] + message = &filer_pb.EventNotification{} + err = proto.Unmarshal(msg.Body, message) + if err != nil { + return "", nil, err + } + return key, message, nil +} diff --git a/weed/replication/sub/notification_google_pub_sub.go b/weed/replication/sub/notification_google_pub_sub.go new file mode 100644 index 000000000..a950bb42b --- /dev/null +++ b/weed/replication/sub/notification_google_pub_sub.go @@ -0,0 +1,109 @@ +package sub + +import ( + "context" + "fmt" + "os" + + "cloud.google.com/go/pubsub" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/golang/protobuf/proto" + "google.golang.org/api/option" +) + +func init() { + NotificationInputs = append(NotificationInputs, &GooglePubSubInput{}) +} + +type GooglePubSubInput struct { + sub *pubsub.Subscription + topicName string + messageChan chan *pubsub.Message +} + +func (k *GooglePubSubInput) GetName() string { + return "google_pub_sub" +} + +func (k *GooglePubSubInput) Initialize(configuration util.Configuration, prefix string) error { + glog.V(0).Infof("notification.google_pub_sub.project_id: %v", configuration.GetString(prefix+"project_id")) + glog.V(0).Infof("notification.google_pub_sub.topic: %v", configuration.GetString(prefix+"topic")) + return k.initialize( + configuration.GetString(prefix+"google_application_credentials"), + configuration.GetString(prefix+"project_id"), + configuration.GetString(prefix+"topic"), + ) +} + +func (k *GooglePubSubInput) initialize(google_application_credentials, projectId, topicName string) (err error) { + + ctx := context.Background() + // Creates a client. + if google_application_credentials == "" { + var found bool + google_application_credentials, found = os.LookupEnv("GOOGLE_APPLICATION_CREDENTIALS") + if !found { + glog.Fatalf("need to specific GOOGLE_APPLICATION_CREDENTIALS env variable or google_application_credentials in filer.toml") + } + } + + client, err := pubsub.NewClient(ctx, projectId, option.WithCredentialsFile(google_application_credentials)) + if err != nil { + glog.Fatalf("Failed to create client: %v", err) + } + + k.topicName = topicName + topic := client.Topic(topicName) + if exists, err := topic.Exists(ctx); err == nil { + if !exists { + topic, err = client.CreateTopic(ctx, topicName) + if err != nil { + glog.Fatalf("Failed to create topic %s: %v", topicName, err) + } + } + } else { + glog.Fatalf("Failed to check topic %s: %v", topicName, err) + } + + subscriptionName := "seaweedfs_sub" + + k.sub = client.Subscription(subscriptionName) + if exists, err := k.sub.Exists(ctx); err == nil { + if !exists { + k.sub, err = client.CreateSubscription(ctx, subscriptionName, pubsub.SubscriptionConfig{Topic: topic}) + if err != nil { + glog.Fatalf("Failed to create subscription %s: %v", subscriptionName, err) + } + } + } else { + glog.Fatalf("Failed to check subscription %s: %v", topicName, err) + } + + k.messageChan = make(chan *pubsub.Message, 1) + + go k.sub.Receive(ctx, func(ctx context.Context, m *pubsub.Message) { + k.messageChan <- m + m.Ack() + }) + + return err +} + +func (k *GooglePubSubInput) ReceiveMessage() (key string, message *filer_pb.EventNotification, err error) { + + m := <-k.messageChan + + // process the message + key = m.Attributes["key"] + message = &filer_pb.EventNotification{} + err = proto.Unmarshal(m.Data, message) + + if err != nil { + err = fmt.Errorf("unmarshal message from google pubsub %s: %v", k.topicName, err) + return + } + + return +} diff --git a/weed/replication/notification_kafka.go b/weed/replication/sub/notification_kafka.go index 3bf917376..fa9cfad9b 100644 --- a/weed/replication/notification_kafka.go +++ b/weed/replication/sub/notification_kafka.go @@ -1,4 +1,4 @@ -package replication +package sub import ( "encoding/json" @@ -28,14 +28,14 @@ func (k *KafkaInput) GetName() string { return "kafka" } -func (k *KafkaInput) Initialize(configuration util.Configuration) error { - glog.V(0).Infof("replication.notification.kafka.hosts: %v\n", configuration.GetStringSlice("hosts")) - glog.V(0).Infof("replication.notification.kafka.topic: %v\n", configuration.GetString("topic")) +func (k *KafkaInput) Initialize(configuration util.Configuration, prefix string) error { + glog.V(0).Infof("replication.notification.kafka.hosts: %v\n", configuration.GetStringSlice(prefix+"hosts")) + glog.V(0).Infof("replication.notification.kafka.topic: %v\n", configuration.GetString(prefix+"topic")) return k.initialize( - configuration.GetStringSlice("hosts"), - configuration.GetString("topic"), - configuration.GetString("offsetFile"), - configuration.GetInt("offsetSaveIntervalSeconds"), + configuration.GetStringSlice(prefix+"hosts"), + configuration.GetString(prefix+"topic"), + configuration.GetString(prefix+"offsetFile"), + configuration.GetInt(prefix+"offsetSaveIntervalSeconds"), ) } diff --git a/weed/replication/notifications.go b/weed/replication/sub/notifications.go index 6ae95d36b..8a2668f98 100644 --- a/weed/replication/notifications.go +++ b/weed/replication/sub/notifications.go @@ -1,4 +1,4 @@ -package replication +package sub import ( "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" @@ -9,7 +9,7 @@ type NotificationInput interface { // GetName gets the name to locate the configuration in sync.toml file GetName() string // Initialize initializes the file store - Initialize(configuration util.Configuration) error + Initialize(configuration util.Configuration, prefix string) error ReceiveMessage() (key string, message *filer_pb.EventNotification, err error) } diff --git a/weed/s3api/auth_credentials.go b/weed/s3api/auth_credentials.go new file mode 100644 index 000000000..c1e8dff1e --- /dev/null +++ b/weed/s3api/auth_credentials.go @@ -0,0 +1,188 @@ +package s3api + +import ( + "bytes" + "fmt" + "io/ioutil" + "net/http" + + "github.com/golang/protobuf/jsonpb" + "github.com/gorilla/mux" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/iam_pb" +) + +type Action string + +const ( + ACTION_READ = "Read" + ACTION_WRITE = "Write" + ACTION_ADMIN = "Admin" +) + +type Iam interface { + Check(f http.HandlerFunc, actions ...Action) http.HandlerFunc +} + +type IdentityAccessManagement struct { + identities []*Identity + domain string +} + +type Identity struct { + Name string + Credentials []*Credential + Actions []Action +} + +type Credential struct { + AccessKey string + SecretKey string +} + +func NewIdentityAccessManagement(fileName string, domain string) *IdentityAccessManagement { + iam := &IdentityAccessManagement{ + domain: domain, + } + if fileName == "" { + return iam + } + if err := iam.loadS3ApiConfiguration(fileName); err != nil { + glog.Fatalf("fail to load config file %s: %v", fileName, err) + } + return iam +} + +func (iam *IdentityAccessManagement) loadS3ApiConfiguration(fileName string) error { + + s3ApiConfiguration := &iam_pb.S3ApiConfiguration{} + + rawData, readErr := ioutil.ReadFile(fileName) + if readErr != nil { + glog.Warningf("fail to read %s : %v", fileName, readErr) + return fmt.Errorf("fail to read %s : %v", fileName, readErr) + } + + glog.V(1).Infof("maybeLoadVolumeInfo Unmarshal volume info %v", fileName) + if err := jsonpb.Unmarshal(bytes.NewReader(rawData), s3ApiConfiguration); err != nil { + glog.Warningf("unmarshal error: %v", err) + return fmt.Errorf("unmarshal %s error: %v", fileName, err) + } + + for _, ident := range s3ApiConfiguration.Identities { + t := &Identity{ + Name: ident.Name, + Credentials: nil, + Actions: nil, + } + for _, action := range ident.Actions { + t.Actions = append(t.Actions, Action(action)) + } + for _, cred := range ident.Credentials { + t.Credentials = append(t.Credentials, &Credential{ + AccessKey: cred.AccessKey, + SecretKey: cred.SecretKey, + }) + } + iam.identities = append(iam.identities, t) + } + + return nil +} + +func (iam *IdentityAccessManagement) lookupByAccessKey(accessKey string) (identity *Identity, cred *Credential, found bool) { + for _, ident := range iam.identities { + for _, cred := range ident.Credentials { + if cred.AccessKey == accessKey { + return ident, cred, true + } + } + } + return nil, nil, false +} + +func (iam *IdentityAccessManagement) Auth(f http.HandlerFunc, action Action) http.HandlerFunc { + + if len(iam.identities) == 0 { + return f + } + + return func(w http.ResponseWriter, r *http.Request) { + errCode := iam.authRequest(r, action) + if errCode == ErrNone { + f(w, r) + return + } + writeErrorResponse(w, errCode, r.URL) + } +} + +// check whether the request has valid access keys +func (iam *IdentityAccessManagement) authRequest(r *http.Request, action Action) ErrorCode { + var identity *Identity + var s3Err ErrorCode + switch getRequestAuthType(r) { + case authTypeStreamingSigned: + return ErrNone + case authTypeUnknown: + glog.V(3).Infof("unknown auth type") + return ErrAccessDenied + case authTypePresignedV2, authTypeSignedV2: + glog.V(3).Infof("v2 auth type") + identity, s3Err = iam.isReqAuthenticatedV2(r) + case authTypeSigned, authTypePresigned: + glog.V(3).Infof("v4 auth type") + identity, s3Err = iam.reqSignatureV4Verify(r) + case authTypePostPolicy: + glog.V(3).Infof("post policy auth type") + return ErrNotImplemented + case authTypeJWT: + glog.V(3).Infof("jwt auth type") + return ErrNotImplemented + case authTypeAnonymous: + return ErrAccessDenied + default: + return ErrNotImplemented + } + + glog.V(3).Infof("auth error: %v", s3Err) + if s3Err != ErrNone { + return s3Err + } + + glog.V(3).Infof("user name: %v actions: %v", identity.Name, identity.Actions) + + vars := mux.Vars(r) + bucket := vars["bucket"] + + if !identity.canDo(action, bucket) { + return ErrAccessDenied + } + + return ErrNone + +} + +func (identity *Identity) canDo(action Action, bucket string) bool { + for _, a := range identity.Actions { + if a == "Admin" { + return true + } + } + for _, a := range identity.Actions { + if a == action { + return true + } + } + if bucket == "" { + return false + } + limitedByBucket := string(action) + ":" + bucket + for _, a := range identity.Actions { + if string(a) == limitedByBucket { + return true + } + } + return false +} diff --git a/weed/s3api/auth_credentials_test.go b/weed/s3api/auth_credentials_test.go new file mode 100644 index 000000000..c6f76560c --- /dev/null +++ b/weed/s3api/auth_credentials_test.go @@ -0,0 +1,68 @@ +package s3api + +import ( + "testing" + + "github.com/golang/protobuf/jsonpb" + + "github.com/chrislusf/seaweedfs/weed/pb/iam_pb" +) + +func TestIdentityListFileFormat(t *testing.T) { + + s3ApiConfiguration := &iam_pb.S3ApiConfiguration{} + + identity1 := &iam_pb.Identity{ + Name: "some_name", + Credentials: []*iam_pb.Credential{ + { + AccessKey: "some_access_key1", + SecretKey: "some_secret_key2", + }, + }, + Actions: []string{ + ACTION_ADMIN, + ACTION_READ, + ACTION_WRITE, + }, + } + identity2 := &iam_pb.Identity{ + Name: "some_read_only_user", + Credentials: []*iam_pb.Credential{ + { + AccessKey: "some_access_key1", + SecretKey: "some_secret_key1", + }, + }, + Actions: []string{ + ACTION_READ, + }, + } + identity3 := &iam_pb.Identity{ + Name: "some_normal_user", + Credentials: []*iam_pb.Credential{ + { + AccessKey: "some_access_key2", + SecretKey: "some_secret_key2", + }, + }, + Actions: []string{ + ACTION_READ, + ACTION_WRITE, + }, + } + + s3ApiConfiguration.Identities = append(s3ApiConfiguration.Identities, identity1) + s3ApiConfiguration.Identities = append(s3ApiConfiguration.Identities, identity2) + s3ApiConfiguration.Identities = append(s3ApiConfiguration.Identities, identity3) + + m := jsonpb.Marshaler{ + EmitDefaults: true, + Indent: " ", + } + + text, _ := m.MarshalToString(s3ApiConfiguration) + + println(text) + +} diff --git a/weed/s3api/auth_signature_v2.go b/weed/s3api/auth_signature_v2.go new file mode 100644 index 000000000..151a9ec26 --- /dev/null +++ b/weed/s3api/auth_signature_v2.go @@ -0,0 +1,412 @@ +/* + * The following code tries to reverse engineer the Amazon S3 APIs, + * and is mostly copied from minio implementation. + */ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package s3api + +import ( + "crypto/hmac" + "crypto/sha1" + "crypto/subtle" + "encoding/base64" + "fmt" + "net" + "net/http" + "net/url" + "path" + "sort" + "strconv" + "strings" + "time" +) + +// Whitelist resource list that will be used in query string for signature-V2 calculation. +// The list should be alphabetically sorted +var resourceList = []string{ + "acl", + "delete", + "lifecycle", + "location", + "logging", + "notification", + "partNumber", + "policy", + "requestPayment", + "response-cache-control", + "response-content-disposition", + "response-content-encoding", + "response-content-language", + "response-content-type", + "response-expires", + "torrent", + "uploadId", + "uploads", + "versionId", + "versioning", + "versions", + "website", +} + +// Verify if request has valid AWS Signature Version '2'. +func (iam *IdentityAccessManagement) isReqAuthenticatedV2(r *http.Request) (*Identity, ErrorCode) { + if isRequestSignatureV2(r) { + return iam.doesSignV2Match(r) + } + return iam.doesPresignV2SignatureMatch(r) +} + +// Authorization = "AWS" + " " + AWSAccessKeyId + ":" + Signature; +// Signature = Base64( HMAC-SHA1( YourSecretKey, UTF-8-Encoding-Of( StringToSign ) ) ); +// +// StringToSign = HTTP-Verb + "\n" + +// Content-Md5 + "\n" + +// Content-Type + "\n" + +// Date + "\n" + +// CanonicalizedProtocolHeaders + +// CanonicalizedResource; +// +// CanonicalizedResource = [ "/" + Bucket ] + +// <HTTP-Request-URI, from the protocol name up to the query string> + +// [ subresource, if present. For example "?acl", "?location", "?logging", or "?torrent"]; +// +// CanonicalizedProtocolHeaders = <described below> + +// doesSignV2Match - Verify authorization header with calculated header in accordance with +// - http://docs.aws.amazon.com/AmazonS3/latest/dev/auth-request-sig-v2.html +// returns true if matches, false otherwise. if error is not nil then it is always false + +func validateV2AuthHeader(v2Auth string) (accessKey string, errCode ErrorCode) { + if v2Auth == "" { + return "", ErrAuthHeaderEmpty + } + // Verify if the header algorithm is supported or not. + if !strings.HasPrefix(v2Auth, signV2Algorithm) { + return "", ErrSignatureVersionNotSupported + } + + // below is V2 Signed Auth header format, splitting on `space` (after the `AWS` string). + // Authorization = "AWS" + " " + AWSAccessKeyId + ":" + Signature + authFields := strings.Split(v2Auth, " ") + if len(authFields) != 2 { + return "", ErrMissingFields + } + + // Then will be splitting on ":", this will seprate `AWSAccessKeyId` and `Signature` string. + keySignFields := strings.Split(strings.TrimSpace(authFields[1]), ":") + if len(keySignFields) != 2 { + return "", ErrMissingFields + } + + return keySignFields[0], ErrNone +} + +func (iam *IdentityAccessManagement) doesSignV2Match(r *http.Request) (*Identity, ErrorCode) { + v2Auth := r.Header.Get("Authorization") + + accessKey, apiError := validateV2AuthHeader(v2Auth) + if apiError != ErrNone { + return nil, apiError + } + + // Access credentials. + // Validate if access key id same. + ident, cred, found := iam.lookupByAccessKey(accessKey) + if !found { + return nil, ErrInvalidAccessKeyID + } + + // r.RequestURI will have raw encoded URI as sent by the client. + tokens := strings.SplitN(r.RequestURI, "?", 2) + encodedResource := tokens[0] + encodedQuery := "" + if len(tokens) == 2 { + encodedQuery = tokens[1] + } + + unescapedQueries, err := unescapeQueries(encodedQuery) + if err != nil { + return nil, ErrInvalidQueryParams + } + + encodedResource, err = getResource(encodedResource, r.Host, iam.domain) + if err != nil { + return nil, ErrInvalidRequest + } + + prefix := fmt.Sprintf("%s %s:", signV2Algorithm, cred.AccessKey) + if !strings.HasPrefix(v2Auth, prefix) { + return nil, ErrSignatureDoesNotMatch + } + v2Auth = v2Auth[len(prefix):] + expectedAuth := signatureV2(cred, r.Method, encodedResource, strings.Join(unescapedQueries, "&"), r.Header) + if !compareSignatureV2(v2Auth, expectedAuth) { + return nil, ErrSignatureDoesNotMatch + } + return ident, ErrNone +} + +// doesPresignV2SignatureMatch - Verify query headers with presigned signature +// - http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html#RESTAuthenticationQueryStringAuth +// returns ErrNone if matches. S3 errors otherwise. +func (iam *IdentityAccessManagement) doesPresignV2SignatureMatch(r *http.Request) (*Identity, ErrorCode) { + + // r.RequestURI will have raw encoded URI as sent by the client. + tokens := strings.SplitN(r.RequestURI, "?", 2) + encodedResource := tokens[0] + encodedQuery := "" + if len(tokens) == 2 { + encodedQuery = tokens[1] + } + + var ( + filteredQueries []string + gotSignature string + expires string + accessKey string + err error + ) + + var unescapedQueries []string + unescapedQueries, err = unescapeQueries(encodedQuery) + if err != nil { + return nil, ErrInvalidQueryParams + } + + // Extract the necessary values from presigned query, construct a list of new filtered queries. + for _, query := range unescapedQueries { + keyval := strings.SplitN(query, "=", 2) + if len(keyval) != 2 { + return nil, ErrInvalidQueryParams + } + switch keyval[0] { + case "AWSAccessKeyId": + accessKey = keyval[1] + case "Signature": + gotSignature = keyval[1] + case "Expires": + expires = keyval[1] + default: + filteredQueries = append(filteredQueries, query) + } + } + + // Invalid values returns error. + if accessKey == "" || gotSignature == "" || expires == "" { + return nil, ErrInvalidQueryParams + } + + // Validate if access key id same. + ident, cred, found := iam.lookupByAccessKey(accessKey) + if !found { + return nil, ErrInvalidAccessKeyID + } + + // Make sure the request has not expired. + expiresInt, err := strconv.ParseInt(expires, 10, 64) + if err != nil { + return nil, ErrMalformedExpires + } + + // Check if the presigned URL has expired. + if expiresInt < time.Now().UTC().Unix() { + return nil, ErrExpiredPresignRequest + } + + encodedResource, err = getResource(encodedResource, r.Host, iam.domain) + if err != nil { + return nil, ErrInvalidRequest + } + + expectedSignature := preSignatureV2(cred, r.Method, encodedResource, strings.Join(filteredQueries, "&"), r.Header, expires) + if !compareSignatureV2(gotSignature, expectedSignature) { + return nil, ErrSignatureDoesNotMatch + } + + return ident, ErrNone +} + +// Escape encodedQuery string into unescaped list of query params, returns error +// if any while unescaping the values. +func unescapeQueries(encodedQuery string) (unescapedQueries []string, err error) { + for _, query := range strings.Split(encodedQuery, "&") { + var unescapedQuery string + unescapedQuery, err = url.QueryUnescape(query) + if err != nil { + return nil, err + } + unescapedQueries = append(unescapedQueries, unescapedQuery) + } + return unescapedQueries, nil +} + +// Returns "/bucketName/objectName" for path-style or virtual-host-style requests. +func getResource(path string, host string, domain string) (string, error) { + if domain == "" { + return path, nil + } + // If virtual-host-style is enabled construct the "resource" properly. + if strings.Contains(host, ":") { + // In bucket.mydomain.com:9000, strip out :9000 + var err error + if host, _, err = net.SplitHostPort(host); err != nil { + return "", err + } + } + if !strings.HasSuffix(host, "."+domain) { + return path, nil + } + bucket := strings.TrimSuffix(host, "."+domain) + return "/" + pathJoin(bucket, path), nil +} + +// pathJoin - like path.Join() but retains trailing "/" of the last element +func pathJoin(elem ...string) string { + trailingSlash := "" + if len(elem) > 0 { + if strings.HasSuffix(elem[len(elem)-1], "/") { + trailingSlash = "/" + } + } + return path.Join(elem...) + trailingSlash +} + +// Return the signature v2 of a given request. +func signatureV2(cred *Credential, method string, encodedResource string, encodedQuery string, headers http.Header) string { + stringToSign := getStringToSignV2(method, encodedResource, encodedQuery, headers, "") + signature := calculateSignatureV2(stringToSign, cred.SecretKey) + return signature +} + +// Return string to sign under two different conditions. +// - if expires string is set then string to sign includes date instead of the Date header. +// - if expires string is empty then string to sign includes date header instead. +func getStringToSignV2(method string, encodedResource, encodedQuery string, headers http.Header, expires string) string { + canonicalHeaders := canonicalizedAmzHeadersV2(headers) + if len(canonicalHeaders) > 0 { + canonicalHeaders += "\n" + } + + date := expires // Date is set to expires date for presign operations. + if date == "" { + // If expires date is empty then request header Date is used. + date = headers.Get("Date") + } + + // From the Amazon docs: + // + // StringToSign = HTTP-Verb + "\n" + + // Content-Md5 + "\n" + + // Content-Type + "\n" + + // Date/Expires + "\n" + + // CanonicalizedProtocolHeaders + + // CanonicalizedResource; + stringToSign := strings.Join([]string{ + method, + headers.Get("Content-MD5"), + headers.Get("Content-Type"), + date, + canonicalHeaders, + }, "\n") + + return stringToSign + canonicalizedResourceV2(encodedResource, encodedQuery) +} + +// Return canonical resource string. +func canonicalizedResourceV2(encodedResource, encodedQuery string) string { + queries := strings.Split(encodedQuery, "&") + keyval := make(map[string]string) + for _, query := range queries { + key := query + val := "" + index := strings.Index(query, "=") + if index != -1 { + key = query[:index] + val = query[index+1:] + } + keyval[key] = val + } + + var canonicalQueries []string + for _, key := range resourceList { + val, ok := keyval[key] + if !ok { + continue + } + if val == "" { + canonicalQueries = append(canonicalQueries, key) + continue + } + canonicalQueries = append(canonicalQueries, key+"="+val) + } + + // The queries will be already sorted as resourceList is sorted, if canonicalQueries + // is empty strings.Join returns empty. + canonicalQuery := strings.Join(canonicalQueries, "&") + if canonicalQuery != "" { + return encodedResource + "?" + canonicalQuery + } + return encodedResource +} + +// Return canonical headers. +func canonicalizedAmzHeadersV2(headers http.Header) string { + var keys []string + keyval := make(map[string]string) + for key := range headers { + lkey := strings.ToLower(key) + if !strings.HasPrefix(lkey, "x-amz-") { + continue + } + keys = append(keys, lkey) + keyval[lkey] = strings.Join(headers[key], ",") + } + sort.Strings(keys) + var canonicalHeaders []string + for _, key := range keys { + canonicalHeaders = append(canonicalHeaders, key+":"+keyval[key]) + } + return strings.Join(canonicalHeaders, "\n") +} + +func calculateSignatureV2(stringToSign string, secret string) string { + hm := hmac.New(sha1.New, []byte(secret)) + hm.Write([]byte(stringToSign)) + return base64.StdEncoding.EncodeToString(hm.Sum(nil)) +} + +// compareSignatureV2 returns true if and only if both signatures +// are equal. The signatures are expected to be base64 encoded strings +// according to the AWS S3 signature V2 spec. +func compareSignatureV2(sig1, sig2 string) bool { + // Decode signature string to binary byte-sequence representation is required + // as Base64 encoding of a value is not unique: + // For example "aGVsbG8=" and "aGVsbG8=\r" will result in the same byte slice. + signature1, err := base64.StdEncoding.DecodeString(sig1) + if err != nil { + return false + } + signature2, err := base64.StdEncoding.DecodeString(sig2) + if err != nil { + return false + } + return subtle.ConstantTimeCompare(signature1, signature2) == 1 +} + +// Return signature-v2 for the presigned request. +func preSignatureV2(cred *Credential, method string, encodedResource string, encodedQuery string, headers http.Header, expires string) string { + stringToSign := getStringToSignV2(method, encodedResource, encodedQuery, headers, expires) + return calculateSignatureV2(stringToSign, cred.SecretKey) +} diff --git a/weed/s3api/auth_signature_v4.go b/weed/s3api/auth_signature_v4.go new file mode 100644 index 000000000..cdfd8be1d --- /dev/null +++ b/weed/s3api/auth_signature_v4.go @@ -0,0 +1,720 @@ +/* + * The following code tries to reverse engineer the Amazon S3 APIs, + * and is mostly copied from minio implementation. + */ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package s3api + +import ( + "bytes" + "crypto/hmac" + "crypto/sha256" + "crypto/subtle" + "encoding/hex" + "net/http" + "net/url" + "regexp" + "sort" + "strconv" + "strings" + "time" + "unicode/utf8" +) + +func (iam *IdentityAccessManagement) reqSignatureV4Verify(r *http.Request) (*Identity, ErrorCode) { + sha256sum := getContentSha256Cksum(r) + switch { + case isRequestSignatureV4(r): + return iam.doesSignatureMatch(sha256sum, r) + case isRequestPresignedSignatureV4(r): + return iam.doesPresignedSignatureMatch(sha256sum, r) + } + return nil, ErrAccessDenied +} + +// Streaming AWS Signature Version '4' constants. +const ( + emptySHA256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + streamingContentSHA256 = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" + signV4ChunkedAlgorithm = "AWS4-HMAC-SHA256-PAYLOAD" + + // http Header "x-amz-content-sha256" == "UNSIGNED-PAYLOAD" indicates that the + // client did not calculate sha256 of the payload. + unsignedPayload = "UNSIGNED-PAYLOAD" +) + +// Returns SHA256 for calculating canonical-request. +func getContentSha256Cksum(r *http.Request) string { + var ( + defaultSha256Cksum string + v []string + ok bool + ) + + // For a presigned request we look at the query param for sha256. + if isRequestPresignedSignatureV4(r) { + // X-Amz-Content-Sha256, if not set in presigned requests, checksum + // will default to 'UNSIGNED-PAYLOAD'. + defaultSha256Cksum = unsignedPayload + v, ok = r.URL.Query()["X-Amz-Content-Sha256"] + if !ok { + v, ok = r.Header["X-Amz-Content-Sha256"] + } + } else { + // X-Amz-Content-Sha256, if not set in signed requests, checksum + // will default to sha256([]byte("")). + defaultSha256Cksum = emptySHA256 + v, ok = r.Header["X-Amz-Content-Sha256"] + } + + // We found 'X-Amz-Content-Sha256' return the captured value. + if ok { + return v[0] + } + + // We couldn't find 'X-Amz-Content-Sha256'. + return defaultSha256Cksum +} + +// Verify authorization header - http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html +func (iam *IdentityAccessManagement) doesSignatureMatch(hashedPayload string, r *http.Request) (*Identity, ErrorCode) { + + // Copy request. + req := *r + + // Save authorization header. + v4Auth := req.Header.Get("Authorization") + + // Parse signature version '4' header. + signV4Values, err := parseSignV4(v4Auth) + if err != ErrNone { + return nil, err + } + + // Extract all the signed headers along with its values. + extractedSignedHeaders, errCode := extractSignedHeaders(signV4Values.SignedHeaders, r) + if errCode != ErrNone { + return nil, errCode + } + + // Verify if the access key id matches. + identity, cred, found := iam.lookupByAccessKey(signV4Values.Credential.accessKey) + if !found { + return nil, ErrInvalidAccessKeyID + } + + // Extract date, if not present throw error. + var date string + if date = req.Header.Get(http.CanonicalHeaderKey("X-Amz-Date")); date == "" { + if date = r.Header.Get("Date"); date == "" { + return nil, ErrMissingDateHeader + } + } + // Parse date header. + t, e := time.Parse(iso8601Format, date) + if e != nil { + return nil, ErrMalformedDate + } + + // Query string. + queryStr := req.URL.Query().Encode() + + // Get canonical request. + canonicalRequest := getCanonicalRequest(extractedSignedHeaders, hashedPayload, queryStr, req.URL.Path, req.Method) + + // Get string to sign from canonical request. + stringToSign := getStringToSign(canonicalRequest, t, signV4Values.Credential.getScope()) + + // Get hmac signing key. + signingKey := getSigningKey(cred.SecretKey, signV4Values.Credential.scope.date, signV4Values.Credential.scope.region) + + // Calculate signature. + newSignature := getSignature(signingKey, stringToSign) + + // Verify if signature match. + if !compareSignatureV4(newSignature, signV4Values.Signature) { + return nil, ErrSignatureDoesNotMatch + } + + // Return error none. + return identity, ErrNone +} + +// credentialHeader data type represents structured form of Credential +// string from authorization header. +type credentialHeader struct { + accessKey string + scope struct { + date time.Time + region string + service string + request string + } +} + +// signValues data type represents structured form of AWS Signature V4 header. +type signValues struct { + Credential credentialHeader + SignedHeaders []string + Signature string +} + +// Return scope string. +func (c credentialHeader) getScope() string { + return strings.Join([]string{ + c.scope.date.Format(yyyymmdd), + c.scope.region, + c.scope.service, + c.scope.request, + }, "/") +} + +// Authorization: algorithm Credential=accessKeyID/credScope, \ +// SignedHeaders=signedHeaders, Signature=signature +// +func parseSignV4(v4Auth string) (sv signValues, aec ErrorCode) { + // Replace all spaced strings, some clients can send spaced + // parameters and some won't. So we pro-actively remove any spaces + // to make parsing easier. + v4Auth = strings.Replace(v4Auth, " ", "", -1) + if v4Auth == "" { + return sv, ErrAuthHeaderEmpty + } + + // Verify if the header algorithm is supported or not. + if !strings.HasPrefix(v4Auth, signV4Algorithm) { + return sv, ErrSignatureVersionNotSupported + } + + // Strip off the Algorithm prefix. + v4Auth = strings.TrimPrefix(v4Auth, signV4Algorithm) + authFields := strings.Split(strings.TrimSpace(v4Auth), ",") + if len(authFields) != 3 { + return sv, ErrMissingFields + } + + // Initialize signature version '4' structured header. + signV4Values := signValues{} + + var err ErrorCode + // Save credentail values. + signV4Values.Credential, err = parseCredentialHeader(authFields[0]) + if err != ErrNone { + return sv, err + } + + // Save signed headers. + signV4Values.SignedHeaders, err = parseSignedHeader(authFields[1]) + if err != ErrNone { + return sv, err + } + + // Save signature. + signV4Values.Signature, err = parseSignature(authFields[2]) + if err != ErrNone { + return sv, err + } + + // Return the structure here. + return signV4Values, ErrNone +} + +// parse credentialHeader string into its structured form. +func parseCredentialHeader(credElement string) (ch credentialHeader, aec ErrorCode) { + creds := strings.Split(strings.TrimSpace(credElement), "=") + if len(creds) != 2 { + return ch, ErrMissingFields + } + if creds[0] != "Credential" { + return ch, ErrMissingCredTag + } + credElements := strings.Split(strings.TrimSpace(creds[1]), "/") + if len(credElements) != 5 { + return ch, ErrCredMalformed + } + // Save access key id. + cred := credentialHeader{ + accessKey: credElements[0], + } + var e error + cred.scope.date, e = time.Parse(yyyymmdd, credElements[1]) + if e != nil { + return ch, ErrMalformedCredentialDate + } + + cred.scope.region = credElements[2] + cred.scope.service = credElements[3] // "s3" + cred.scope.request = credElements[4] // "aws4_request" + return cred, ErrNone +} + +// Parse slice of signed headers from signed headers tag. +func parseSignedHeader(signedHdrElement string) ([]string, ErrorCode) { + signedHdrFields := strings.Split(strings.TrimSpace(signedHdrElement), "=") + if len(signedHdrFields) != 2 { + return nil, ErrMissingFields + } + if signedHdrFields[0] != "SignedHeaders" { + return nil, ErrMissingSignHeadersTag + } + if signedHdrFields[1] == "" { + return nil, ErrMissingFields + } + signedHeaders := strings.Split(signedHdrFields[1], ";") + return signedHeaders, ErrNone +} + +// Parse signature from signature tag. +func parseSignature(signElement string) (string, ErrorCode) { + signFields := strings.Split(strings.TrimSpace(signElement), "=") + if len(signFields) != 2 { + return "", ErrMissingFields + } + if signFields[0] != "Signature" { + return "", ErrMissingSignTag + } + if signFields[1] == "" { + return "", ErrMissingFields + } + signature := signFields[1] + return signature, ErrNone +} + +// check query headers with presigned signature +// - http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html +func (iam *IdentityAccessManagement) doesPresignedSignatureMatch(hashedPayload string, r *http.Request) (*Identity, ErrorCode) { + + // Copy request + req := *r + + // Parse request query string. + pSignValues, err := parsePreSignV4(req.URL.Query()) + if err != ErrNone { + return nil, err + } + + // Verify if the access key id matches. + identity, cred, found := iam.lookupByAccessKey(pSignValues.Credential.accessKey) + if !found { + return nil, ErrInvalidAccessKeyID + } + + // Extract all the signed headers along with its values. + extractedSignedHeaders, errCode := extractSignedHeaders(pSignValues.SignedHeaders, r) + if errCode != ErrNone { + return nil, errCode + } + // Construct new query. + query := make(url.Values) + if req.URL.Query().Get("X-Amz-Content-Sha256") != "" { + query.Set("X-Amz-Content-Sha256", hashedPayload) + } + + query.Set("X-Amz-Algorithm", signV4Algorithm) + + now := time.Now().UTC() + + // If the host which signed the request is slightly ahead in time (by less than globalMaxSkewTime) the + // request should still be allowed. + if pSignValues.Date.After(now.Add(15 * time.Minute)) { + return nil, ErrRequestNotReadyYet + } + + if now.Sub(pSignValues.Date) > pSignValues.Expires { + return nil, ErrExpiredPresignRequest + } + + // Save the date and expires. + t := pSignValues.Date + expireSeconds := int(pSignValues.Expires / time.Second) + + // Construct the query. + query.Set("X-Amz-Date", t.Format(iso8601Format)) + query.Set("X-Amz-Expires", strconv.Itoa(expireSeconds)) + query.Set("X-Amz-SignedHeaders", getSignedHeaders(extractedSignedHeaders)) + query.Set("X-Amz-Credential", cred.AccessKey+"/"+getScope(t, pSignValues.Credential.scope.region)) + + // Save other headers available in the request parameters. + for k, v := range req.URL.Query() { + + // Handle the metadata in presigned put query string + if strings.Contains(strings.ToLower(k), "x-amz-meta-") { + query.Set(k, v[0]) + } + + if strings.HasPrefix(strings.ToLower(k), "x-amz") { + continue + } + query[k] = v + } + + // Get the encoded query. + encodedQuery := query.Encode() + + // Verify if date query is same. + if req.URL.Query().Get("X-Amz-Date") != query.Get("X-Amz-Date") { + return nil, ErrSignatureDoesNotMatch + } + // Verify if expires query is same. + if req.URL.Query().Get("X-Amz-Expires") != query.Get("X-Amz-Expires") { + return nil, ErrSignatureDoesNotMatch + } + // Verify if signed headers query is same. + if req.URL.Query().Get("X-Amz-SignedHeaders") != query.Get("X-Amz-SignedHeaders") { + return nil, ErrSignatureDoesNotMatch + } + // Verify if credential query is same. + if req.URL.Query().Get("X-Amz-Credential") != query.Get("X-Amz-Credential") { + return nil, ErrSignatureDoesNotMatch + } + // Verify if sha256 payload query is same. + if req.URL.Query().Get("X-Amz-Content-Sha256") != "" { + if req.URL.Query().Get("X-Amz-Content-Sha256") != query.Get("X-Amz-Content-Sha256") { + return nil, ErrContentSHA256Mismatch + } + } + + /// Verify finally if signature is same. + + // Get canonical request. + presignedCanonicalReq := getCanonicalRequest(extractedSignedHeaders, hashedPayload, encodedQuery, req.URL.Path, req.Method) + + // Get string to sign from canonical request. + presignedStringToSign := getStringToSign(presignedCanonicalReq, t, pSignValues.Credential.getScope()) + + // Get hmac presigned signing key. + presignedSigningKey := getSigningKey(cred.SecretKey, pSignValues.Credential.scope.date, pSignValues.Credential.scope.region) + + // Get new signature. + newSignature := getSignature(presignedSigningKey, presignedStringToSign) + + // Verify signature. + if !compareSignatureV4(req.URL.Query().Get("X-Amz-Signature"), newSignature) { + return nil, ErrSignatureDoesNotMatch + } + return identity, ErrNone +} + +func contains(list []string, elem string) bool { + for _, t := range list { + if t == elem { + return true + } + } + return false +} + +// preSignValues data type represents structued form of AWS Signature V4 query string. +type preSignValues struct { + signValues + Date time.Time + Expires time.Duration +} + +// Parses signature version '4' query string of the following form. +// +// querystring = X-Amz-Algorithm=algorithm +// querystring += &X-Amz-Credential= urlencode(accessKey + '/' + credential_scope) +// querystring += &X-Amz-Date=date +// querystring += &X-Amz-Expires=timeout interval +// querystring += &X-Amz-SignedHeaders=signed_headers +// querystring += &X-Amz-Signature=signature +// +// verifies if any of the necessary query params are missing in the presigned request. +func doesV4PresignParamsExist(query url.Values) ErrorCode { + v4PresignQueryParams := []string{"X-Amz-Algorithm", "X-Amz-Credential", "X-Amz-Signature", "X-Amz-Date", "X-Amz-SignedHeaders", "X-Amz-Expires"} + for _, v4PresignQueryParam := range v4PresignQueryParams { + if _, ok := query[v4PresignQueryParam]; !ok { + return ErrInvalidQueryParams + } + } + return ErrNone +} + +// Parses all the presigned signature values into separate elements. +func parsePreSignV4(query url.Values) (psv preSignValues, aec ErrorCode) { + var err ErrorCode + // verify whether the required query params exist. + err = doesV4PresignParamsExist(query) + if err != ErrNone { + return psv, err + } + + // Verify if the query algorithm is supported or not. + if query.Get("X-Amz-Algorithm") != signV4Algorithm { + return psv, ErrInvalidQuerySignatureAlgo + } + + // Initialize signature version '4' structured header. + preSignV4Values := preSignValues{} + + // Save credential. + preSignV4Values.Credential, err = parseCredentialHeader("Credential=" + query.Get("X-Amz-Credential")) + if err != ErrNone { + return psv, err + } + + var e error + // Save date in native time.Time. + preSignV4Values.Date, e = time.Parse(iso8601Format, query.Get("X-Amz-Date")) + if e != nil { + return psv, ErrMalformedPresignedDate + } + + // Save expires in native time.Duration. + preSignV4Values.Expires, e = time.ParseDuration(query.Get("X-Amz-Expires") + "s") + if e != nil { + return psv, ErrMalformedExpires + } + + if preSignV4Values.Expires < 0 { + return psv, ErrNegativeExpires + } + + // Check if Expiry time is less than 7 days (value in seconds). + if preSignV4Values.Expires.Seconds() > 604800 { + return psv, ErrMaximumExpires + } + + // Save signed headers. + preSignV4Values.SignedHeaders, err = parseSignedHeader("SignedHeaders=" + query.Get("X-Amz-SignedHeaders")) + if err != ErrNone { + return psv, err + } + + // Save signature. + preSignV4Values.Signature, err = parseSignature("Signature=" + query.Get("X-Amz-Signature")) + if err != ErrNone { + return psv, err + } + + // Return structed form of signature query string. + return preSignV4Values, ErrNone +} + +// extractSignedHeaders extract signed headers from Authorization header +func extractSignedHeaders(signedHeaders []string, r *http.Request) (http.Header, ErrorCode) { + reqHeaders := r.Header + // find whether "host" is part of list of signed headers. + // if not return ErrUnsignedHeaders. "host" is mandatory. + if !contains(signedHeaders, "host") { + return nil, ErrUnsignedHeaders + } + extractedSignedHeaders := make(http.Header) + for _, header := range signedHeaders { + // `host` will not be found in the headers, can be found in r.Host. + // but its alway necessary that the list of signed headers containing host in it. + val, ok := reqHeaders[http.CanonicalHeaderKey(header)] + if ok { + for _, enc := range val { + extractedSignedHeaders.Add(header, enc) + } + continue + } + switch header { + case "expect": + // Golang http server strips off 'Expect' header, if the + // client sent this as part of signed headers we need to + // handle otherwise we would see a signature mismatch. + // `aws-cli` sets this as part of signed headers. + // + // According to + // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.20 + // Expect header is always of form: + // + // Expect = "Expect" ":" 1#expectation + // expectation = "100-continue" | expectation-extension + // + // So it safe to assume that '100-continue' is what would + // be sent, for the time being keep this work around. + // Adding a *TODO* to remove this later when Golang server + // doesn't filter out the 'Expect' header. + extractedSignedHeaders.Set(header, "100-continue") + case "host": + // Go http server removes "host" from Request.Header + extractedSignedHeaders.Set(header, r.Host) + case "transfer-encoding": + for _, enc := range r.TransferEncoding { + extractedSignedHeaders.Add(header, enc) + } + case "content-length": + // Signature-V4 spec excludes Content-Length from signed headers list for signature calculation. + // But some clients deviate from this rule. Hence we consider Content-Length for signature + // calculation to be compatible with such clients. + extractedSignedHeaders.Set(header, strconv.FormatInt(r.ContentLength, 10)) + default: + return nil, ErrUnsignedHeaders + } + } + return extractedSignedHeaders, ErrNone +} + +// getSignedHeaders generate a string i.e alphabetically sorted, semicolon-separated list of lowercase request header names +func getSignedHeaders(signedHeaders http.Header) string { + var headers []string + for k := range signedHeaders { + headers = append(headers, strings.ToLower(k)) + } + sort.Strings(headers) + return strings.Join(headers, ";") +} + +// getScope generate a string of a specific date, an AWS region, and a service. +func getScope(t time.Time, region string) string { + scope := strings.Join([]string{ + t.Format(yyyymmdd), + region, + "s3", + "aws4_request", + }, "/") + return scope +} + +// getCanonicalRequest generate a canonical request of style +// +// canonicalRequest = +// <HTTPMethod>\n +// <CanonicalURI>\n +// <CanonicalQueryString>\n +// <CanonicalHeaders>\n +// <SignedHeaders>\n +// <HashedPayload> +// +func getCanonicalRequest(extractedSignedHeaders http.Header, payload, queryStr, urlPath, method string) string { + rawQuery := strings.Replace(queryStr, "+", "%20", -1) + encodedPath := encodePath(urlPath) + canonicalRequest := strings.Join([]string{ + method, + encodedPath, + rawQuery, + getCanonicalHeaders(extractedSignedHeaders), + getSignedHeaders(extractedSignedHeaders), + payload, + }, "\n") + return canonicalRequest +} + +// getStringToSign a string based on selected query values. +func getStringToSign(canonicalRequest string, t time.Time, scope string) string { + stringToSign := signV4Algorithm + "\n" + t.Format(iso8601Format) + "\n" + stringToSign = stringToSign + scope + "\n" + canonicalRequestBytes := sha256.Sum256([]byte(canonicalRequest)) + stringToSign = stringToSign + hex.EncodeToString(canonicalRequestBytes[:]) + return stringToSign +} + +// sumHMAC calculate hmac between two input byte array. +func sumHMAC(key []byte, data []byte) []byte { + hash := hmac.New(sha256.New, key) + hash.Write(data) + return hash.Sum(nil) +} + +// getSigningKey hmac seed to calculate final signature. +func getSigningKey(secretKey string, t time.Time, region string) []byte { + date := sumHMAC([]byte("AWS4"+secretKey), []byte(t.Format(yyyymmdd))) + regionBytes := sumHMAC(date, []byte(region)) + service := sumHMAC(regionBytes, []byte("s3")) + signingKey := sumHMAC(service, []byte("aws4_request")) + return signingKey +} + +// getSignature final signature in hexadecimal form. +func getSignature(signingKey []byte, stringToSign string) string { + return hex.EncodeToString(sumHMAC(signingKey, []byte(stringToSign))) +} + +// getCanonicalHeaders generate a list of request headers with their values +func getCanonicalHeaders(signedHeaders http.Header) string { + var headers []string + vals := make(http.Header) + for k, vv := range signedHeaders { + headers = append(headers, strings.ToLower(k)) + vals[strings.ToLower(k)] = vv + } + sort.Strings(headers) + + var buf bytes.Buffer + for _, k := range headers { + buf.WriteString(k) + buf.WriteByte(':') + for idx, v := range vals[k] { + if idx > 0 { + buf.WriteByte(',') + } + buf.WriteString(signV4TrimAll(v)) + } + buf.WriteByte('\n') + } + return buf.String() +} + +// Trim leading and trailing spaces and replace sequential spaces with one space, following Trimall() +// in http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html +func signV4TrimAll(input string) string { + // Compress adjacent spaces (a space is determined by + // unicode.IsSpace() internally here) to one space and return + return strings.Join(strings.Fields(input), " ") +} + +// if object matches reserved string, no need to encode them +var reservedObjectNames = regexp.MustCompile("^[a-zA-Z0-9-_.~/]+$") + +// EncodePath encode the strings from UTF-8 byte representations to HTML hex escape sequences +// +// This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8 +// non english characters cannot be parsed due to the nature in which url.Encode() is written +// +// This function on the other hand is a direct replacement for url.Encode() technique to support +// pretty much every UTF-8 character. +func encodePath(pathName string) string { + if reservedObjectNames.MatchString(pathName) { + return pathName + } + var encodedPathname string + for _, s := range pathName { + if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark) + encodedPathname = encodedPathname + string(s) + continue + } + switch s { + case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark) + encodedPathname = encodedPathname + string(s) + continue + default: + len := utf8.RuneLen(s) + if len < 0 { + // if utf8 cannot convert return the same string as is + return pathName + } + u := make([]byte, len) + utf8.EncodeRune(u, s) + for _, r := range u { + hex := hex.EncodeToString([]byte{r}) + encodedPathname = encodedPathname + "%" + strings.ToUpper(hex) + } + } + } + return encodedPathname +} + +// compareSignatureV4 returns true if and only if both signatures +// are equal. The signatures are expected to be HEX encoded strings +// according to the AWS S3 signature V4 spec. +func compareSignatureV4(sig1, sig2 string) bool { + // The CTC using []byte(str) works because the hex encoding + // is unique for a sequence of bytes. See also compareSignatureV2. + return subtle.ConstantTimeCompare([]byte(sig1), []byte(sig2)) == 1 +} diff --git a/weed/s3api/auto_signature_v4_test.go b/weed/s3api/auto_signature_v4_test.go new file mode 100644 index 000000000..036b5c052 --- /dev/null +++ b/weed/s3api/auto_signature_v4_test.go @@ -0,0 +1,418 @@ +package s3api + +import ( + "bytes" + "crypto/md5" + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "errors" + "fmt" + "io" + "io/ioutil" + "net/http" + "net/url" + "sort" + "strconv" + "strings" + "testing" + "time" + "unicode/utf8" +) + +// TestIsRequestPresignedSignatureV4 - Test validates the logic for presign signature verision v4 detection. +func TestIsRequestPresignedSignatureV4(t *testing.T) { + testCases := []struct { + inputQueryKey string + inputQueryValue string + expectedResult bool + }{ + // Test case - 1. + // Test case with query key ""X-Amz-Credential" set. + {"", "", false}, + // Test case - 2. + {"X-Amz-Credential", "", true}, + // Test case - 3. + {"X-Amz-Content-Sha256", "", false}, + } + + for i, testCase := range testCases { + // creating an input HTTP request. + // Only the query parameters are relevant for this particular test. + inputReq, err := http.NewRequest("GET", "http://example.com", nil) + if err != nil { + t.Fatalf("Error initializing input HTTP request: %v", err) + } + q := inputReq.URL.Query() + q.Add(testCase.inputQueryKey, testCase.inputQueryValue) + inputReq.URL.RawQuery = q.Encode() + + actualResult := isRequestPresignedSignatureV4(inputReq) + if testCase.expectedResult != actualResult { + t.Errorf("Test %d: Expected the result to `%v`, but instead got `%v`", i+1, testCase.expectedResult, actualResult) + } + } +} + +// Tests is requested authenticated function, tests replies for s3 errors. +func TestIsReqAuthenticated(t *testing.T) { + iam := NewIdentityAccessManagement("", "") + iam.identities = []*Identity{ + { + Name: "someone", + Credentials: []*Credential{ + { + AccessKey: "access_key_1", + SecretKey: "secret_key_1", + }, + }, + Actions: nil, + }, + } + + // List of test cases for validating http request authentication. + testCases := []struct { + req *http.Request + s3Error ErrorCode + }{ + // When request is unsigned, access denied is returned. + {mustNewRequest("GET", "http://127.0.0.1:9000", 0, nil, t), ErrAccessDenied}, + // When request is properly signed, error is none. + {mustNewSignedRequest("GET", "http://127.0.0.1:9000", 0, nil, t), ErrNone}, + } + + // Validates all testcases. + for i, testCase := range testCases { + if _, s3Error := iam.reqSignatureV4Verify(testCase.req); s3Error != testCase.s3Error { + ioutil.ReadAll(testCase.req.Body) + t.Fatalf("Test %d: Unexpected S3 error: want %d - got %d", i, testCase.s3Error, s3Error) + } + } +} + +func TestCheckAdminRequestAuthType(t *testing.T) { + iam := NewIdentityAccessManagement("", "") + iam.identities = []*Identity{ + { + Name: "someone", + Credentials: []*Credential{ + { + AccessKey: "access_key_1", + SecretKey: "secret_key_1", + }, + }, + Actions: nil, + }, + } + + testCases := []struct { + Request *http.Request + ErrCode ErrorCode + }{ + {Request: mustNewRequest("GET", "http://127.0.0.1:9000", 0, nil, t), ErrCode: ErrAccessDenied}, + {Request: mustNewSignedRequest("GET", "http://127.0.0.1:9000", 0, nil, t), ErrCode: ErrNone}, + {Request: mustNewPresignedRequest("GET", "http://127.0.0.1:9000", 0, nil, t), ErrCode: ErrNone}, + } + for i, testCase := range testCases { + if _, s3Error := iam.reqSignatureV4Verify(testCase.Request); s3Error != testCase.ErrCode { + t.Errorf("Test %d: Unexpected s3error returned wanted %d, got %d", i, testCase.ErrCode, s3Error) + } + } +} + +// Provides a fully populated http request instance, fails otherwise. +func mustNewRequest(method string, urlStr string, contentLength int64, body io.ReadSeeker, t *testing.T) *http.Request { + req, err := newTestRequest(method, urlStr, contentLength, body) + if err != nil { + t.Fatalf("Unable to initialize new http request %s", err) + } + return req +} + +// This is similar to mustNewRequest but additionally the request +// is signed with AWS Signature V4, fails if not able to do so. +func mustNewSignedRequest(method string, urlStr string, contentLength int64, body io.ReadSeeker, t *testing.T) *http.Request { + req := mustNewRequest(method, urlStr, contentLength, body, t) + cred := &Credential{"access_key_1", "secret_key_1"} + if err := signRequestV4(req, cred.AccessKey, cred.SecretKey); err != nil { + t.Fatalf("Unable to inititalized new signed http request %s", err) + } + return req +} + +// This is similar to mustNewRequest but additionally the request +// is presigned with AWS Signature V4, fails if not able to do so. +func mustNewPresignedRequest(method string, urlStr string, contentLength int64, body io.ReadSeeker, t *testing.T) *http.Request { + req := mustNewRequest(method, urlStr, contentLength, body, t) + cred := &Credential{"access_key_1", "secret_key_1"} + if err := preSignV4(req, cred.AccessKey, cred.SecretKey, int64(10*time.Minute.Seconds())); err != nil { + t.Fatalf("Unable to inititalized new signed http request %s", err) + } + return req +} + +// Returns new HTTP request object. +func newTestRequest(method, urlStr string, contentLength int64, body io.ReadSeeker) (*http.Request, error) { + if method == "" { + method = "POST" + } + + // Save for subsequent use + var hashedPayload string + var md5Base64 string + switch { + case body == nil: + hashedPayload = getSHA256Hash([]byte{}) + default: + payloadBytes, err := ioutil.ReadAll(body) + if err != nil { + return nil, err + } + hashedPayload = getSHA256Hash(payloadBytes) + md5Base64 = getMD5HashBase64(payloadBytes) + } + // Seek back to beginning. + if body != nil { + body.Seek(0, 0) + } else { + body = bytes.NewReader([]byte("")) + } + req, err := http.NewRequest(method, urlStr, body) + if err != nil { + return nil, err + } + if md5Base64 != "" { + req.Header.Set("Content-Md5", md5Base64) + } + req.Header.Set("x-amz-content-sha256", hashedPayload) + + // Add Content-Length + req.ContentLength = contentLength + + return req, nil +} + +// getSHA256Hash returns SHA-256 hash in hex encoding of given data. +func getSHA256Hash(data []byte) string { + return hex.EncodeToString(getSHA256Sum(data)) +} + +// getMD5HashBase64 returns MD5 hash in base64 encoding of given data. +func getMD5HashBase64(data []byte) string { + return base64.StdEncoding.EncodeToString(getMD5Sum(data)) +} + +// getSHA256Hash returns SHA-256 sum of given data. +func getSHA256Sum(data []byte) []byte { + hash := sha256.New() + hash.Write(data) + return hash.Sum(nil) +} + +// getMD5Sum returns MD5 sum of given data. +func getMD5Sum(data []byte) []byte { + hash := md5.New() + hash.Write(data) + return hash.Sum(nil) +} + +// getMD5Hash returns MD5 hash in hex encoding of given data. +func getMD5Hash(data []byte) string { + return hex.EncodeToString(getMD5Sum(data)) +} + +var ignoredHeaders = map[string]bool{ + "Authorization": true, + "Content-Type": true, + "Content-Length": true, + "User-Agent": true, +} + +// Sign given request using Signature V4. +func signRequestV4(req *http.Request, accessKey, secretKey string) error { + // Get hashed payload. + hashedPayload := req.Header.Get("x-amz-content-sha256") + if hashedPayload == "" { + return fmt.Errorf("Invalid hashed payload") + } + + currTime := time.Now() + + // Set x-amz-date. + req.Header.Set("x-amz-date", currTime.Format(iso8601Format)) + + // Get header map. + headerMap := make(map[string][]string) + for k, vv := range req.Header { + // If request header key is not in ignored headers, then add it. + if _, ok := ignoredHeaders[http.CanonicalHeaderKey(k)]; !ok { + headerMap[strings.ToLower(k)] = vv + } + } + + // Get header keys. + headers := []string{"host"} + for k := range headerMap { + headers = append(headers, k) + } + sort.Strings(headers) + + region := "us-east-1" + + // Get canonical headers. + var buf bytes.Buffer + for _, k := range headers { + buf.WriteString(k) + buf.WriteByte(':') + switch { + case k == "host": + buf.WriteString(req.URL.Host) + fallthrough + default: + for idx, v := range headerMap[k] { + if idx > 0 { + buf.WriteByte(',') + } + buf.WriteString(v) + } + buf.WriteByte('\n') + } + } + canonicalHeaders := buf.String() + + // Get signed headers. + signedHeaders := strings.Join(headers, ";") + + // Get canonical query string. + req.URL.RawQuery = strings.Replace(req.URL.Query().Encode(), "+", "%20", -1) + + // Get canonical URI. + canonicalURI := EncodePath(req.URL.Path) + + // Get canonical request. + // canonicalRequest = + // <HTTPMethod>\n + // <CanonicalURI>\n + // <CanonicalQueryString>\n + // <CanonicalHeaders>\n + // <SignedHeaders>\n + // <HashedPayload> + // + canonicalRequest := strings.Join([]string{ + req.Method, + canonicalURI, + req.URL.RawQuery, + canonicalHeaders, + signedHeaders, + hashedPayload, + }, "\n") + + // Get scope. + scope := strings.Join([]string{ + currTime.Format(yyyymmdd), + region, + "s3", + "aws4_request", + }, "/") + + stringToSign := "AWS4-HMAC-SHA256" + "\n" + currTime.Format(iso8601Format) + "\n" + stringToSign = stringToSign + scope + "\n" + stringToSign = stringToSign + getSHA256Hash([]byte(canonicalRequest)) + + date := sumHMAC([]byte("AWS4"+secretKey), []byte(currTime.Format(yyyymmdd))) + regionHMAC := sumHMAC(date, []byte(region)) + service := sumHMAC(regionHMAC, []byte("s3")) + signingKey := sumHMAC(service, []byte("aws4_request")) + + signature := hex.EncodeToString(sumHMAC(signingKey, []byte(stringToSign))) + + // final Authorization header + parts := []string{ + "AWS4-HMAC-SHA256" + " Credential=" + accessKey + "/" + scope, + "SignedHeaders=" + signedHeaders, + "Signature=" + signature, + } + auth := strings.Join(parts, ", ") + req.Header.Set("Authorization", auth) + + return nil +} + +// preSignV4 presign the request, in accordance with +// http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html. +func preSignV4(req *http.Request, accessKeyID, secretAccessKey string, expires int64) error { + // Presign is not needed for anonymous credentials. + if accessKeyID == "" || secretAccessKey == "" { + return errors.New("Presign cannot be generated without access and secret keys") + } + + region := "us-east-1" + date := time.Now().UTC() + scope := getScope(date, region) + credential := fmt.Sprintf("%s/%s", accessKeyID, scope) + + // Set URL query. + query := req.URL.Query() + query.Set("X-Amz-Algorithm", signV4Algorithm) + query.Set("X-Amz-Date", date.Format(iso8601Format)) + query.Set("X-Amz-Expires", strconv.FormatInt(expires, 10)) + query.Set("X-Amz-SignedHeaders", "host") + query.Set("X-Amz-Credential", credential) + query.Set("X-Amz-Content-Sha256", unsignedPayload) + + // "host" is the only header required to be signed for Presigned URLs. + extractedSignedHeaders := make(http.Header) + extractedSignedHeaders.Set("host", req.Host) + + queryStr := strings.Replace(query.Encode(), "+", "%20", -1) + canonicalRequest := getCanonicalRequest(extractedSignedHeaders, unsignedPayload, queryStr, req.URL.Path, req.Method) + stringToSign := getStringToSign(canonicalRequest, date, scope) + signingKey := getSigningKey(secretAccessKey, date, region) + signature := getSignature(signingKey, stringToSign) + + req.URL.RawQuery = query.Encode() + + // Add signature header to RawQuery. + req.URL.RawQuery += "&X-Amz-Signature=" + url.QueryEscape(signature) + + // Construct the final presigned URL. + return nil +} + +// EncodePath encode the strings from UTF-8 byte representations to HTML hex escape sequences +// +// This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8 +// non english characters cannot be parsed due to the nature in which url.Encode() is written +// +// This function on the other hand is a direct replacement for url.Encode() technique to support +// pretty much every UTF-8 character. +func EncodePath(pathName string) string { + if reservedObjectNames.MatchString(pathName) { + return pathName + } + var encodedPathname string + for _, s := range pathName { + if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark) + encodedPathname = encodedPathname + string(s) + continue + } + switch s { + case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark) + encodedPathname = encodedPathname + string(s) + continue + default: + len := utf8.RuneLen(s) + if len < 0 { + // if utf8 cannot convert return the same string as is + return pathName + } + u := make([]byte, len) + utf8.EncodeRune(u, s) + for _, r := range u { + hex := hex.EncodeToString([]byte{r}) + encodedPathname = encodedPathname + "%" + strings.ToUpper(hex) + } + } + } + return encodedPathname +} diff --git a/weed/s3api/chunked_reader_v4.go b/weed/s3api/chunked_reader_v4.go index 061fd4a92..76c4394c2 100644 --- a/weed/s3api/chunked_reader_v4.go +++ b/weed/s3api/chunked_reader_v4.go @@ -21,17 +21,115 @@ package s3api import ( "bufio" "bytes" + "crypto/sha256" + "encoding/hex" "errors" - "github.com/dustin/go-humanize" + "hash" "io" "net/http" -) + "time" -// Streaming AWS Signature Version '4' constants. -const ( - streamingContentSHA256 = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" + "github.com/dustin/go-humanize" ) +// getChunkSignature - get chunk signature. +func getChunkSignature(secretKey string, seedSignature string, region string, date time.Time, hashedChunk string) string { + + // Calculate string to sign. + stringToSign := signV4ChunkedAlgorithm + "\n" + + date.Format(iso8601Format) + "\n" + + getScope(date, region) + "\n" + + seedSignature + "\n" + + emptySHA256 + "\n" + + hashedChunk + + // Get hmac signing key. + signingKey := getSigningKey(secretKey, date, region) + + // Calculate signature. + newSignature := getSignature(signingKey, stringToSign) + + return newSignature +} + +// calculateSeedSignature - Calculate seed signature in accordance with +// - http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-streaming.html +// returns signature, error otherwise if the signature mismatches or any other +// error while parsing and validating. +func (iam *IdentityAccessManagement) calculateSeedSignature(r *http.Request) (cred *Credential, signature string, region string, date time.Time, errCode ErrorCode) { + + // Copy request. + req := *r + + // Save authorization header. + v4Auth := req.Header.Get("Authorization") + + // Parse signature version '4' header. + signV4Values, errCode := parseSignV4(v4Auth) + if errCode != ErrNone { + return nil, "", "", time.Time{}, errCode + } + + // Payload streaming. + payload := streamingContentSHA256 + + // Payload for STREAMING signature should be 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD' + if payload != req.Header.Get("X-Amz-Content-Sha256") { + return nil, "", "", time.Time{}, ErrContentSHA256Mismatch + } + + // Extract all the signed headers along with its values. + extractedSignedHeaders, errCode := extractSignedHeaders(signV4Values.SignedHeaders, r) + if errCode != ErrNone { + return nil, "", "", time.Time{}, errCode + } + // Verify if the access key id matches. + _, cred, found := iam.lookupByAccessKey(signV4Values.Credential.accessKey) + if !found { + return nil, "", "", time.Time{}, ErrInvalidAccessKeyID + } + + // Verify if region is valid. + region = signV4Values.Credential.scope.region + + // Extract date, if not present throw error. + var dateStr string + if dateStr = req.Header.Get(http.CanonicalHeaderKey("x-amz-date")); dateStr == "" { + if dateStr = r.Header.Get("Date"); dateStr == "" { + return nil, "", "", time.Time{}, ErrMissingDateHeader + } + } + // Parse date header. + var err error + date, err = time.Parse(iso8601Format, dateStr) + if err != nil { + return nil, "", "", time.Time{}, ErrMalformedDate + } + + // Query string. + queryStr := req.URL.Query().Encode() + + // Get canonical request. + canonicalRequest := getCanonicalRequest(extractedSignedHeaders, payload, queryStr, req.URL.Path, req.Method) + + // Get string to sign from canonical request. + stringToSign := getStringToSign(canonicalRequest, date, signV4Values.Credential.getScope()) + + // Get hmac signing key. + signingKey := getSigningKey(cred.SecretKey, signV4Values.Credential.scope.date, region) + + // Calculate signature. + newSignature := getSignature(signingKey, stringToSign) + + // Verify if signature match. + if !compareSignatureV4(newSignature, signV4Values.Signature) { + return nil, "", "", time.Time{}, ErrSignatureDoesNotMatch + } + + // Return caculated signature. + return cred, newSignature, region, date, ErrNone +} + const maxLineLength = 4 * humanize.KiByte // assumed <= bufio.defaultBufSize 4KiB // lineTooLong is generated as chunk header is bigger than 4KiB. @@ -43,22 +141,36 @@ var errMalformedEncoding = errors.New("malformed chunked encoding") // newSignV4ChunkedReader returns a new s3ChunkedReader that translates the data read from r // out of HTTP "chunked" format before returning it. // The s3ChunkedReader returns io.EOF when the final 0-length chunk is read. -func newSignV4ChunkedReader(req *http.Request) io.ReadCloser { - return &s3ChunkedReader{ - reader: bufio.NewReader(req.Body), - state: readChunkHeader, +func (iam *IdentityAccessManagement) newSignV4ChunkedReader(req *http.Request) (io.ReadCloser, ErrorCode) { + ident, seedSignature, region, seedDate, errCode := iam.calculateSeedSignature(req) + if errCode != ErrNone { + return nil, errCode } + return &s3ChunkedReader{ + cred: ident, + reader: bufio.NewReader(req.Body), + seedSignature: seedSignature, + seedDate: seedDate, + region: region, + chunkSHA256Writer: sha256.New(), + state: readChunkHeader, + }, ErrNone } // Represents the overall state that is required for decoding a // AWS Signature V4 chunked reader. type s3ChunkedReader struct { - reader *bufio.Reader - state chunkState - lastChunk bool - chunkSignature string - n uint64 // Unread bytes in chunk - err error + cred *Credential + reader *bufio.Reader + seedSignature string + seedDate time.Time + region string + state chunkState + lastChunk bool + chunkSignature string + chunkSHA256Writer hash.Hash // Calculates sha256 of chunk data. + n uint64 // Unread bytes in chunk + err error } // Read chunk reads the chunk token signature portion. @@ -157,6 +269,9 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) { return 0, cr.err } + // Calculate sha256. + cr.chunkSHA256Writer.Write(rbuf[:n0]) + // Update the bytes read into request buffer so far. n += n0 buf = buf[n0:] @@ -169,6 +284,19 @@ func (cr *s3ChunkedReader) Read(buf []byte) (n int, err error) { continue } case verifyChunk: + // Calculate the hashed chunk. + hashedChunk := hex.EncodeToString(cr.chunkSHA256Writer.Sum(nil)) + // Calculate the chunk signature. + newSignature := getChunkSignature(cr.cred.SecretKey, cr.seedSignature, cr.region, cr.seedDate, hashedChunk) + if !compareSignatureV4(cr.chunkSignature, newSignature) { + // Chunk signature doesn't match we return signature does not match. + cr.err = errors.New("chunk signature does not match") + return 0, cr.err + } + // Newly calculated signature becomes the seed for the next chunk + // this follows the chaining. + cr.seedSignature = newSignature + cr.chunkSHA256Writer.Reset() if cr.lastChunk { cr.state = eofChunk } else { diff --git a/weed/s3api/custom_types.go b/weed/s3api/custom_types.go new file mode 100644 index 000000000..569dfc3ac --- /dev/null +++ b/weed/s3api/custom_types.go @@ -0,0 +1,3 @@ +package s3api + +const s3TimeFormat = "2006-01-02T15:04:05.999Z07:00" diff --git a/weed/s3api/filer_multipart.go b/weed/s3api/filer_multipart.go index 73be496d9..792127771 100644 --- a/weed/s3api/filer_multipart.go +++ b/weed/s3api/filer_multipart.go @@ -1,6 +1,7 @@ package s3api import ( + "encoding/xml" "fmt" "path/filepath" "strconv" @@ -9,18 +10,20 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/s3" + "github.com/google/uuid" + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/satori/go.uuid" ) type InitiateMultipartUploadResult struct { + XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ InitiateMultipartUploadResult"` s3.CreateMultipartUploadOutput } func (s3a *S3ApiServer) createMultipartUpload(input *s3.CreateMultipartUploadInput) (output *InitiateMultipartUploadResult, code ErrorCode) { - uploadId, _ := uuid.NewV4() + uploadId, _ := uuid.NewRandom() uploadIdString := uploadId.String() if err := s3a.mkdir(s3a.genUploadsFolder(*input.Bucket), uploadIdString, func(entry *filer_pb.Entry) { @@ -34,9 +37,9 @@ func (s3a *S3ApiServer) createMultipartUpload(input *s3.CreateMultipartUploadInp } output = &InitiateMultipartUploadResult{ - s3.CreateMultipartUploadOutput{ + CreateMultipartUploadOutput: s3.CreateMultipartUploadOutput{ Bucket: input.Bucket, - Key: input.Key, + Key: objectKey(input.Key), UploadId: aws.String(uploadIdString), }, } @@ -45,6 +48,7 @@ func (s3a *S3ApiServer) createMultipartUpload(input *s3.CreateMultipartUploadInp } type CompleteMultipartUploadResult struct { + XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ CompleteMultipartUploadResult"` s3.CompleteMultipartUploadOutput } @@ -65,7 +69,7 @@ func (s3a *S3ApiServer) completeMultipartUpload(input *s3.CompleteMultipartUploa if strings.HasSuffix(entry.Name, ".part") && !entry.IsDirectory { for _, chunk := range entry.Chunks { p := &filer_pb.FileChunk{ - FileId: chunk.FileId, + FileId: chunk.GetFileIdString(), Offset: offset, Size: chunk.Size, Mtime: chunk.Mtime, @@ -87,6 +91,11 @@ func (s3a *S3ApiServer) completeMultipartUpload(input *s3.CompleteMultipartUploa } dirName = fmt.Sprintf("%s/%s/%s", s3a.option.BucketsPath, *input.Bucket, dirName) + // remove suffix '/' + if strings.HasSuffix(dirName, "/") { + dirName = dirName[:len(dirName)-1] + } + err = s3a.mkFile(dirName, entryName, finalParts) if err != nil { @@ -95,10 +104,11 @@ func (s3a *S3ApiServer) completeMultipartUpload(input *s3.CompleteMultipartUploa } output = &CompleteMultipartUploadResult{ - s3.CompleteMultipartUploadOutput{ - Bucket: input.Bucket, - ETag: aws.String("\"" + filer2.ETag(finalParts) + "\""), - Key: input.Key, + CompleteMultipartUploadOutput: s3.CompleteMultipartUploadOutput{ + Location: aws.String(fmt.Sprintf("http://%s%s/%s", s3a.option.Filer, dirName, entryName)), + Bucket: input.Bucket, + ETag: aws.String("\"" + filer2.ETag(finalParts) + "\""), + Key: objectKey(input.Key), }, } @@ -128,13 +138,14 @@ func (s3a *S3ApiServer) abortMultipartUpload(input *s3.AbortMultipartUploadInput } type ListMultipartUploadsResult struct { + XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListMultipartUploadsResult"` s3.ListMultipartUploadsOutput } func (s3a *S3ApiServer) listMultipartUploads(input *s3.ListMultipartUploadsInput) (output *ListMultipartUploadsResult, code ErrorCode) { output = &ListMultipartUploadsResult{ - s3.ListMultipartUploadsOutput{ + ListMultipartUploadsOutput: s3.ListMultipartUploadsOutput{ Bucket: input.Bucket, Delimiter: input.Delimiter, EncodingType: input.EncodingType, @@ -154,7 +165,7 @@ func (s3a *S3ApiServer) listMultipartUploads(input *s3.ListMultipartUploadsInput if entry.Extended != nil { key := entry.Extended["key"] output.Uploads = append(output.Uploads, &s3.MultipartUpload{ - Key: aws.String(string(key)), + Key: objectKey(aws.String(string(key))), UploadId: aws.String(entry.Name), }) } @@ -164,22 +175,22 @@ func (s3a *S3ApiServer) listMultipartUploads(input *s3.ListMultipartUploadsInput } type ListPartsResult struct { + XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListPartsResult"` s3.ListPartsOutput } func (s3a *S3ApiServer) listObjectParts(input *s3.ListPartsInput) (output *ListPartsResult, code ErrorCode) { output = &ListPartsResult{ - s3.ListPartsOutput{ + ListPartsOutput: s3.ListPartsOutput{ Bucket: input.Bucket, - Key: input.Key, + Key: objectKey(input.Key), UploadId: input.UploadId, MaxParts: input.MaxParts, // the maximum number of parts to return. PartNumberMarker: input.PartNumberMarker, // the part number starts after this, exclusive }, } - entries, err := s3a.list(s3a.genUploadsFolder(*input.Bucket)+"/"+*input.UploadId, - "", fmt.Sprintf("%04d.part", *input.PartNumberMarker), false, int(*input.MaxParts)) + entries, err := s3a.list(s3a.genUploadsFolder(*input.Bucket)+"/"+*input.UploadId, "", fmt.Sprintf("%04d.part", *input.PartNumberMarker), false, int(*input.MaxParts)) if err != nil { glog.Errorf("listObjectParts %s %s error: %v", *input.Bucket, *input.UploadId, err) return nil, ErrNoSuchUpload diff --git a/weed/s3api/filer_multipart_test.go b/weed/s3api/filer_multipart_test.go new file mode 100644 index 000000000..835665dd6 --- /dev/null +++ b/weed/s3api/filer_multipart_test.go @@ -0,0 +1,26 @@ +package s3api + +import ( + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/s3" + "testing" +) + +func TestInitiateMultipartUploadResult(t *testing.T) { + + expected := `<?xml version="1.0" encoding="UTF-8"?> +<InitiateMultipartUploadResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Bucket>example-bucket</Bucket><Key>example-object</Key><UploadId>VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA</UploadId></InitiateMultipartUploadResult>` + response := &InitiateMultipartUploadResult{ + CreateMultipartUploadOutput: s3.CreateMultipartUploadOutput{ + Bucket: aws.String("example-bucket"), + Key: aws.String("example-object"), + UploadId: aws.String("VXBsb2FkIElEIGZvciA2aWWpbmcncyBteS1tb3ZpZS5tMnRzIHVwbG9hZA"), + }, + } + + encoded := string(encodeResponse(response)) + if encoded != expected { + t.Errorf("unexpected output: %s\nexpecting:%s", encoded, expected) + } + +} diff --git a/weed/s3api/filer_util.go b/weed/s3api/filer_util.go index 6dd93d50f..ec1eedcb4 100644 --- a/weed/s3api/filer_util.go +++ b/weed/s3api/filer_util.go @@ -3,7 +3,9 @@ package s3api import ( "context" "fmt" + "io" "os" + "strings" "time" "github.com/chrislusf/seaweedfs/weed/glog" @@ -35,7 +37,8 @@ func (s3a *S3ApiServer) mkdir(parentDirectoryPath string, dirName string, fn fun } glog.V(1).Infof("mkdir: %v", request) - if _, err := client.CreateEntry(context.Background(), request); err != nil { + if err := filer_pb.CreateEntry(client, request); err != nil { + glog.V(0).Infof("mkdir %v: %v", request, err) return fmt.Errorf("mkdir %s/%s: %v", parentDirectoryPath, dirName, err) } @@ -65,7 +68,8 @@ func (s3a *S3ApiServer) mkFile(parentDirectoryPath string, fileName string, chun } glog.V(1).Infof("create file: %s/%s", parentDirectoryPath, fileName) - if _, err := client.CreateEntry(context.Background(), request); err != nil { + if err := filer_pb.CreateEntry(client, request); err != nil { + glog.V(0).Infof("create file %v:%v", request, err) return fmt.Errorf("create file %s/%s: %v", parentDirectoryPath, fileName, err) } @@ -86,12 +90,25 @@ func (s3a *S3ApiServer) list(parentDirectoryPath, prefix, startFrom string, incl } glog.V(4).Infof("read directory: %v", request) - resp, err := client.ListEntries(context.Background(), request) + stream, err := client.ListEntries(context.Background(), request) if err != nil { + glog.V(0).Infof("read directory %v: %v", request, err) return fmt.Errorf("list dir %v: %v", parentDirectoryPath, err) } - entries = resp.Entries + for { + resp, recvErr := stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + break + } else { + return recvErr + } + } + + entries = append(entries, resp.Entry) + + } return nil }) @@ -100,22 +117,20 @@ func (s3a *S3ApiServer) list(parentDirectoryPath, prefix, startFrom string, incl } -func (s3a *S3ApiServer) rm(parentDirectoryPath string, entryName string, isDirectory, isDeleteData, isRecursive bool) error { +func (s3a *S3ApiServer) rm(parentDirectoryPath, entryName string, isDirectory, isDeleteData, isRecursive bool) error { return s3a.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { - ctx := context.Background() - request := &filer_pb.DeleteEntryRequest{ Directory: parentDirectoryPath, Name: entryName, - IsDirectory: isDirectory, IsDeleteData: isDeleteData, IsRecursive: isRecursive, } glog.V(1).Infof("delete entry %v/%v: %v", parentDirectoryPath, entryName, request) - if _, err := client.DeleteEntry(ctx, request); err != nil { + if _, err := client.DeleteEntry(context.Background(), request); err != nil { + glog.V(0).Infof("delete entry %v: %v", request, err) return fmt.Errorf("delete entry %s/%s: %v", parentDirectoryPath, entryName, err) } @@ -124,20 +139,76 @@ func (s3a *S3ApiServer) rm(parentDirectoryPath string, entryName string, isDirec } +func (s3a *S3ApiServer) streamRemove(quiet bool, fn func() (finished bool, parentDirectoryPath string, entryName string, isDeleteData, isRecursive bool), respFn func(err string)) error { + + return s3a.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + stream, err := client.StreamDeleteEntries(context.Background()) + if err != nil { + glog.V(0).Infof("stream delete entry: %v", err) + return fmt.Errorf("stream delete entry: %v", err) + } + + waitc := make(chan struct{}) + go func() { + for { + resp, err := stream.Recv() + if err == io.EOF { + // read done. + close(waitc) + return + } + if err != nil { + glog.V(0).Infof("streamRemove: %v", err) + return + } + respFn(resp.Error) + } + }() + + for { + finished, parentDirectoryPath, entryName, isDeleteData, isRecursive := fn() + if finished { + break + } + err = stream.Send(&filer_pb.DeleteEntryRequest{ + Directory: parentDirectoryPath, + Name: entryName, + IsDeleteData: isDeleteData, + IsRecursive: isRecursive, + IgnoreRecursiveError: quiet, + }) + if err != nil { + glog.V(0).Infof("streamRemove: %v", err) + break + } + + } + stream.CloseSend() + <-waitc + return err + + }) + +} + func (s3a *S3ApiServer) exists(parentDirectoryPath string, entryName string, isDirectory bool) (exists bool, err error) { err = s3a.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { - ctx := context.Background() - request := &filer_pb.LookupDirectoryEntryRequest{ Directory: parentDirectoryPath, Name: entryName, } glog.V(4).Infof("exists entry %v/%v: %v", parentDirectoryPath, entryName, request) - resp, err := client.LookupDirectoryEntry(ctx, request) + resp, err := filer_pb.LookupEntry(client, request) if err != nil { + if err == filer_pb.ErrNotFound { + exists = false + return nil + } + glog.V(0).Infof("exists entry %v: %v", request, err) return fmt.Errorf("exists entry %s/%s: %v", parentDirectoryPath, entryName, err) } @@ -148,3 +219,11 @@ func (s3a *S3ApiServer) exists(parentDirectoryPath string, entryName string, isD return } + +func objectKey(key *string) *string { + if strings.HasPrefix(*key, "/") { + t := (*key)[1:] + return &t + } + return key +} diff --git a/weed/s3api/s3api_auth.go b/weed/s3api/s3api_auth.go index b680fe1e1..bf5cf5fab 100644 --- a/weed/s3api/s3api_auth.go +++ b/weed/s3api/s3api_auth.go @@ -9,6 +9,8 @@ import ( const ( signV4Algorithm = "AWS4-HMAC-SHA256" signV2Algorithm = "AWS" + iso8601Format = "20060102T150405Z" + yyyymmdd = "20060102" ) // Verify if request has JWT. @@ -23,8 +25,8 @@ func isRequestSignatureV4(r *http.Request) bool { // Verify if request has AWS Signature Version '2'. func isRequestSignatureV2(r *http.Request) bool { - return (!strings.HasPrefix(r.Header.Get("Authorization"), signV4Algorithm) && - strings.HasPrefix(r.Header.Get("Authorization"), signV2Algorithm)) + return !strings.HasPrefix(r.Header.Get("Authorization"), signV4Algorithm) && + strings.HasPrefix(r.Header.Get("Authorization"), signV2Algorithm) } // Verify if request has AWS PreSign Version '4'. diff --git a/weed/s3api/s3api_bucket_handlers.go b/weed/s3api/s3api_bucket_handlers.go index df9abd451..3e5089bed 100644 --- a/weed/s3api/s3api_bucket_handlers.go +++ b/weed/s3api/s3api_bucket_handlers.go @@ -2,14 +2,19 @@ package s3api import ( "context" + "encoding/xml" "fmt" + "math" "net/http" "os" "time" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/gorilla/mux" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/gorilla/mux" ) var ( @@ -17,37 +22,39 @@ var ( OS_GID = uint32(os.Getgid()) ) +type ListAllMyBucketsResult struct { + XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListAllMyBucketsResult"` + Owner *s3.Owner + Buckets []*s3.Bucket `xml:"Buckets>Bucket"` +} + func (s3a *S3ApiServer) ListBucketsHandler(w http.ResponseWriter, r *http.Request) { - var response ListAllMyBucketsResponse + var response ListAllMyBucketsResult - entries, err := s3a.list(s3a.option.BucketsPath, "", "", false, 0) + entries, err := s3a.list(s3a.option.BucketsPath, "", "", false, math.MaxInt32) if err != nil { writeErrorResponse(w, ErrInternalError, r.URL) return } - var buckets []ListAllMyBucketsEntry + var buckets []*s3.Bucket for _, entry := range entries { if entry.IsDirectory { - buckets = append(buckets, ListAllMyBucketsEntry{ - Name: entry.Name, - CreationDate: time.Unix(entry.Attributes.Crtime, 0), + buckets = append(buckets, &s3.Bucket{ + Name: aws.String(entry.Name), + CreationDate: aws.Time(time.Unix(entry.Attributes.Crtime, 0)), }) } } - response = ListAllMyBucketsResponse{ - ListAllMyBucketsResponse: ListAllMyBucketsResult{ - Owner: CanonicalUser{ - ID: "", - DisplayName: "", - }, - Buckets: ListAllMyBucketsList{ - Bucket: buckets, - }, + response = ListAllMyBucketsResult{ + Owner: &s3.Owner{ + ID: aws.String(""), + DisplayName: aws.String(""), }, + Buckets: buckets, } writeSuccessResponseXML(w, encodeResponse(response)) @@ -74,15 +81,13 @@ func (s3a *S3ApiServer) DeleteBucketHandler(w http.ResponseWriter, r *http.Reque err := s3a.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { - ctx := context.Background() - // delete collection deleteCollectionRequest := &filer_pb.DeleteCollectionRequest{ Collection: bucket, } glog.V(1).Infof("delete collection: %v", deleteCollectionRequest) - if _, err := client.DeleteCollection(ctx, deleteCollectionRequest); err != nil { + if _, err := client.DeleteCollection(context.Background(), deleteCollectionRequest); err != nil { return fmt.Errorf("delete collection %s: %v", bucket, err) } @@ -112,7 +117,10 @@ func (s3a *S3ApiServer) HeadBucketHandler(w http.ResponseWriter, r *http.Request } glog.V(1).Infof("lookup bucket: %v", request) - if _, err := client.LookupDirectoryEntry(context.Background(), request); err != nil { + if _, err := filer_pb.LookupEntry(client, request); err != nil { + if err == filer_pb.ErrNotFound { + return filer_pb.ErrNotFound + } return fmt.Errorf("lookup bucket %s/%s: %v", s3a.option.BucketsPath, bucket, err) } diff --git a/weed/s3api/s3api_bucket_handlers_test.go b/weed/s3api/s3api_bucket_handlers_test.go new file mode 100644 index 000000000..7ab04830b --- /dev/null +++ b/weed/s3api/s3api_bucket_handlers_test.go @@ -0,0 +1,39 @@ +package s3api + +import ( + "testing" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/s3" +) + +func TestListBucketsHandler(t *testing.T) { + + expected := `<?xml version="1.0" encoding="UTF-8"?> +<ListAllMyBucketsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><DisplayName></DisplayName><ID></ID></Owner><Buckets><Bucket><CreationDate>2011-04-09T12:34:49Z</CreationDate><Name>test1</Name></Bucket><Bucket><CreationDate>2011-02-09T12:34:49Z</CreationDate><Name>test2</Name></Bucket></Buckets></ListAllMyBucketsResult>` + var response ListAllMyBucketsResult + + var buckets []*s3.Bucket + buckets = append(buckets, &s3.Bucket{ + Name: aws.String("test1"), + CreationDate: aws.Time(time.Date(2011, 4, 9, 12, 34, 49, 0, time.UTC)), + }) + buckets = append(buckets, &s3.Bucket{ + Name: aws.String("test2"), + CreationDate: aws.Time(time.Date(2011, 2, 9, 12, 34, 49, 0, time.UTC)), + }) + + response = ListAllMyBucketsResult{ + Owner: &s3.Owner{ + ID: aws.String(""), + DisplayName: aws.String(""), + }, + Buckets: buckets, + } + + encoded := string(encodeResponse(response)) + if encoded != expected { + t.Errorf("unexpected output: %s\nexpecting:%s", encoded, expected) + } +} diff --git a/weed/s3api/s3api_errors.go b/weed/s3api/s3api_errors.go index 7ba55ed28..3f97c73cb 100644 --- a/weed/s3api/s3api_errors.go +++ b/weed/s3api/s3api_errors.go @@ -27,6 +27,7 @@ type ErrorCode int // Error codes, see full list at http://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html const ( ErrNone ErrorCode = iota + ErrAccessDenied ErrMethodNotAllowed ErrBucketNotEmpty ErrBucketAlreadyExists @@ -41,12 +42,43 @@ const ( ErrInvalidPartNumberMarker ErrInvalidPart ErrInternalError + ErrInvalidCopyDest + ErrInvalidCopySource + ErrAuthHeaderEmpty + ErrSignatureVersionNotSupported + ErrMissingFields + ErrMissingCredTag + ErrCredMalformed + ErrMalformedXML + ErrMalformedDate + ErrMalformedPresignedDate + ErrMalformedCredentialDate + ErrMissingSignHeadersTag + ErrMissingSignTag + ErrUnsignedHeaders + ErrInvalidQueryParams + ErrInvalidQuerySignatureAlgo + ErrExpiredPresignRequest + ErrMalformedExpires + ErrNegativeExpires + ErrMaximumExpires + ErrSignatureDoesNotMatch + ErrContentSHA256Mismatch + ErrInvalidAccessKeyID + ErrRequestNotReadyYet + ErrMissingDateHeader + ErrInvalidRequest ErrNotImplemented ) // error code to APIError structure, these fields carry respective // descriptions for all the error responses. var errorCodeResponse = map[ErrorCode]APIError{ + ErrAccessDenied: { + Code: "AccessDenied", + Description: "Access Denied.", + HTTPStatusCode: http.StatusForbidden, + }, ErrMethodNotAllowed: { Code: "MethodNotAllowed", Description: "The specified method is not allowed against this resource.", @@ -118,6 +150,139 @@ var errorCodeResponse = map[ErrorCode]APIError{ Description: "One or more of the specified parts could not be found. The part may not have been uploaded, or the specified entity tag may not match the part's entity tag.", HTTPStatusCode: http.StatusBadRequest, }, + + ErrInvalidCopyDest: { + Code: "InvalidRequest", + Description: "This copy request is illegal because it is trying to copy an object to itself without changing the object's metadata, storage class, website redirect location or encryption attributes.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrInvalidCopySource: { + Code: "InvalidArgument", + Description: "Copy Source must mention the source bucket and key: sourcebucket/sourcekey.", + HTTPStatusCode: http.StatusBadRequest, + }, + + ErrMalformedXML: { + Code: "MalformedXML", + Description: "The XML you provided was not well-formed or did not validate against our published schema.", + HTTPStatusCode: http.StatusBadRequest, + }, + + ErrAuthHeaderEmpty: { + Code: "InvalidArgument", + Description: "Authorization header is invalid -- one and only one ' ' (space) required.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrSignatureVersionNotSupported: { + Code: "InvalidRequest", + Description: "The authorization mechanism you have provided is not supported. Please use AWS4-HMAC-SHA256.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrMissingFields: { + Code: "MissingFields", + Description: "Missing fields in request.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrMissingCredTag: { + Code: "InvalidRequest", + Description: "Missing Credential field for this request.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrCredMalformed: { + Code: "AuthorizationQueryParametersError", + Description: "Error parsing the X-Amz-Credential parameter; the Credential is mal-formed; expecting \"<YOUR-AKID>/YYYYMMDD/REGION/SERVICE/aws4_request\".", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrMalformedDate: { + Code: "MalformedDate", + Description: "Invalid date format header, expected to be in ISO8601, RFC1123 or RFC1123Z time format.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrMalformedPresignedDate: { + Code: "AuthorizationQueryParametersError", + Description: "X-Amz-Date must be in the ISO8601 Long Format \"yyyyMMdd'T'HHmmss'Z'\"", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrMissingSignHeadersTag: { + Code: "InvalidArgument", + Description: "Signature header missing SignedHeaders field.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrMissingSignTag: { + Code: "AccessDenied", + Description: "Signature header missing Signature field.", + HTTPStatusCode: http.StatusBadRequest, + }, + + ErrUnsignedHeaders: { + Code: "AccessDenied", + Description: "There were headers present in the request which were not signed", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrInvalidQueryParams: { + Code: "AuthorizationQueryParametersError", + Description: "Query-string authentication version 4 requires the X-Amz-Algorithm, X-Amz-Credential, X-Amz-Signature, X-Amz-Date, X-Amz-SignedHeaders, and X-Amz-Expires parameters.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrInvalidQuerySignatureAlgo: { + Code: "AuthorizationQueryParametersError", + Description: "X-Amz-Algorithm only supports \"AWS4-HMAC-SHA256\".", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrExpiredPresignRequest: { + Code: "AccessDenied", + Description: "Request has expired", + HTTPStatusCode: http.StatusForbidden, + }, + ErrMalformedExpires: { + Code: "AuthorizationQueryParametersError", + Description: "X-Amz-Expires should be a number", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrNegativeExpires: { + Code: "AuthorizationQueryParametersError", + Description: "X-Amz-Expires must be non-negative", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrMaximumExpires: { + Code: "AuthorizationQueryParametersError", + Description: "X-Amz-Expires must be less than a week (in seconds); that is, the given X-Amz-Expires must be less than 604800 seconds", + HTTPStatusCode: http.StatusBadRequest, + }, + + ErrInvalidAccessKeyID: { + Code: "InvalidAccessKeyId", + Description: "The access key ID you provided does not exist in our records.", + HTTPStatusCode: http.StatusForbidden, + }, + + ErrRequestNotReadyYet: { + Code: "AccessDenied", + Description: "Request is not valid yet", + HTTPStatusCode: http.StatusForbidden, + }, + + ErrSignatureDoesNotMatch: { + Code: "SignatureDoesNotMatch", + Description: "The request signature we calculated does not match the signature you provided. Check your key and signing method.", + HTTPStatusCode: http.StatusForbidden, + }, + + ErrContentSHA256Mismatch: { + Code: "XAmzContentSHA256Mismatch", + Description: "The provided 'x-amz-content-sha256' header does not match what was computed.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrMissingDateHeader: { + Code: "AccessDenied", + Description: "AWS authentication requires a valid Date or x-amz-date header", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrInvalidRequest: { + Code: "InvalidRequest", + Description: "Invalid Request", + HTTPStatusCode: http.StatusBadRequest, + }, ErrNotImplemented: { Code: "NotImplemented", Description: "A header you provided implies functionality that is not implemented", diff --git a/weed/s3api/s3api_handlers.go b/weed/s3api/s3api_handlers.go index 281fc9000..d7212d5e3 100644 --- a/weed/s3api/s3api_handlers.go +++ b/weed/s3api/s3api_handlers.go @@ -5,12 +5,15 @@ import ( "encoding/base64" "encoding/xml" "fmt" - "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/chrislusf/seaweedfs/weed/util" "net/http" "net/url" "time" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" ) type mimeType string @@ -37,15 +40,11 @@ func encodeResponse(response interface{}) []byte { func (s3a *S3ApiServer) withFilerClient(fn func(filer_pb.SeaweedFilerClient) error) error { - grpcConnection, err := util.GrpcDial(s3a.option.FilerGrpcAddress) - if err != nil { - return fmt.Errorf("fail to dial %s: %v", s3a.option.FilerGrpcAddress, err) - } - defer grpcConnection.Close() - - client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return pb.WithCachedGrpcClient(func(grpcConnection *grpc.ClientConn) error { + client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return fn(client) + }, s3a.option.FilerGrpcAddress, s3a.option.GrpcDialOption) - return fn(client) } // If none of the http routes match respond with MethodNotAllowed @@ -77,6 +76,7 @@ func writeResponse(w http.ResponseWriter, statusCode int, response []byte, mType } w.WriteHeader(statusCode) if response != nil { + glog.V(4).Infof("status %d %s: %s", statusCode, mType, string(response)) w.Write(response) w.(http.Flusher).Flush() } diff --git a/weed/s3api/s3api_object_copy_handlers.go b/weed/s3api/s3api_object_copy_handlers.go new file mode 100644 index 000000000..b8fb3f6a4 --- /dev/null +++ b/weed/s3api/s3api_object_copy_handlers.go @@ -0,0 +1,151 @@ +package s3api + +import ( + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + "time" + + "github.com/gorilla/mux" + + "github.com/chrislusf/seaweedfs/weed/util" +) + +func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request) { + + vars := mux.Vars(r) + dstBucket := vars["bucket"] + dstObject := getObject(vars) + + // Copy source path. + cpSrcPath, err := url.QueryUnescape(r.Header.Get("X-Amz-Copy-Source")) + if err != nil { + // Save unescaped string as is. + cpSrcPath = r.Header.Get("X-Amz-Copy-Source") + } + + srcBucket, srcObject := pathToBucketAndObject(cpSrcPath) + // If source object is empty or bucket is empty, reply back invalid copy source. + if srcObject == "" || srcBucket == "" { + writeErrorResponse(w, ErrInvalidCopySource, r.URL) + return + } + + if srcBucket == dstBucket && srcObject == dstObject { + writeErrorResponse(w, ErrInvalidCopySource, r.URL) + return + } + + dstUrl := fmt.Sprintf("http://%s%s/%s%s?collection=%s", + s3a.option.Filer, s3a.option.BucketsPath, dstBucket, dstObject, dstBucket) + srcUrl := fmt.Sprintf("http://%s%s/%s%s", + s3a.option.Filer, s3a.option.BucketsPath, srcBucket, srcObject) + + _, _, dataReader, err := util.DownloadFile(srcUrl) + if err != nil { + writeErrorResponse(w, ErrInvalidCopySource, r.URL) + return + } + defer dataReader.Close() + + etag, errCode := s3a.putToFiler(r, dstUrl, dataReader) + + if errCode != ErrNone { + writeErrorResponse(w, errCode, r.URL) + return + } + + setEtag(w, etag) + + response := CopyObjectResult{ + ETag: etag, + LastModified: time.Now(), + } + + writeSuccessResponseXML(w, encodeResponse(response)) + +} + +func pathToBucketAndObject(path string) (bucket, object string) { + path = strings.TrimPrefix(path, "/") + parts := strings.SplitN(path, "/", 2) + if len(parts) == 2 { + return parts[0], "/" + parts[1] + } + return parts[0], "/" +} + +type CopyPartResult struct { + LastModified time.Time `xml:"LastModified"` + ETag string `xml:"ETag"` +} + +func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Request) { + // https://docs.aws.amazon.com/AmazonS3/latest/dev/CopyingObjctsUsingRESTMPUapi.html + // https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPartCopy.html + vars := mux.Vars(r) + dstBucket := vars["bucket"] + // dstObject := getObject(vars) + + // Copy source path. + cpSrcPath, err := url.QueryUnescape(r.Header.Get("X-Amz-Copy-Source")) + if err != nil { + // Save unescaped string as is. + cpSrcPath = r.Header.Get("X-Amz-Copy-Source") + } + + srcBucket, srcObject := pathToBucketAndObject(cpSrcPath) + // If source object is empty or bucket is empty, reply back invalid copy source. + if srcObject == "" || srcBucket == "" { + writeErrorResponse(w, ErrInvalidCopySource, r.URL) + return + } + + uploadID := r.URL.Query().Get("uploadId") + partIDString := r.URL.Query().Get("partNumber") + + partID, err := strconv.Atoi(partIDString) + if err != nil { + writeErrorResponse(w, ErrInvalidPart, r.URL) + return + } + + // check partID with maximum part ID for multipart objects + if partID > globalMaxPartID { + writeErrorResponse(w, ErrInvalidMaxParts, r.URL) + return + } + + rangeHeader := r.Header.Get("x-amz-copy-source-range") + + dstUrl := fmt.Sprintf("http://%s%s/%s/%04d.part?collection=%s", + s3a.option.Filer, s3a.genUploadsFolder(dstBucket), uploadID, partID-1, dstBucket) + srcUrl := fmt.Sprintf("http://%s%s/%s%s", + s3a.option.Filer, s3a.option.BucketsPath, srcBucket, srcObject) + + dataReader, err := util.ReadUrlAsReaderCloser(srcUrl, rangeHeader) + if err != nil { + writeErrorResponse(w, ErrInvalidCopySource, r.URL) + return + } + defer dataReader.Close() + + etag, errCode := s3a.putToFiler(r, dstUrl, dataReader) + + if errCode != ErrNone { + writeErrorResponse(w, errCode, r.URL) + return + } + + setEtag(w, etag) + + response := CopyPartResult{ + ETag: etag, + LastModified: time.Now(), + } + + writeSuccessResponseXML(w, encodeResponse(response)) + +} diff --git a/weed/s3api/s3api_object_handlers.go b/weed/s3api/s3api_object_handlers.go index 93816c7ee..9d03cdbe3 100644 --- a/weed/s3api/s3api_object_handlers.go +++ b/weed/s3api/s3api_object_handlers.go @@ -3,15 +3,18 @@ package s3api import ( "crypto/md5" "encoding/json" + "encoding/xml" "fmt" "io" "io/ioutil" "net/http" "strings" + "github.com/gorilla/mux" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/server" - "github.com/gorilla/mux" + "github.com/chrislusf/seaweedfs/weed/util" ) var ( @@ -40,12 +43,17 @@ func (s3a *S3ApiServer) PutObjectHandler(w http.ResponseWriter, r *http.Request) rAuthType := getRequestAuthType(r) dataReader := r.Body + var s3ErrCode ErrorCode if rAuthType == authTypeStreamingSigned { - dataReader = newSignV4ChunkedReader(r) + dataReader, s3ErrCode = s3a.iam.newSignV4ChunkedReader(r) } + if s3ErrCode != ErrNone { + writeErrorResponse(w, s3ErrCode, r.URL) + return + } + defer dataReader.Close() - uploadUrl := fmt.Sprintf("http://%s%s/%s%s?collection=%s", - s3a.option.Filer, s3a.option.BucketsPath, bucket, object, bucket) + uploadUrl := fmt.Sprintf("http://%s%s/%s%s", s3a.option.Filer, s3a.option.BucketsPath, bucket, object) etag, errCode := s3a.putToFiler(r, uploadUrl, dataReader) @@ -63,6 +71,7 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request) vars := mux.Vars(r) bucket := vars["bucket"] + object := getObject(vars) if strings.HasSuffix(r.URL.Path, "/") { writeErrorResponse(w, ErrNotImplemented, r.URL) @@ -70,9 +79,9 @@ func (s3a *S3ApiServer) GetObjectHandler(w http.ResponseWriter, r *http.Request) } destUrl := fmt.Sprintf("http://%s%s/%s%s", - s3a.option.Filer, s3a.option.BucketsPath, bucket, r.RequestURI) + s3a.option.Filer, s3a.option.BucketsPath, bucket, object) - s3a.proxyToFiler(w, r, destUrl, passThroghResponse) + s3a.proxyToFiler(w, r, destUrl, passThroughResponse) } @@ -80,11 +89,12 @@ func (s3a *S3ApiServer) HeadObjectHandler(w http.ResponseWriter, r *http.Request vars := mux.Vars(r) bucket := vars["bucket"] + object := getObject(vars) destUrl := fmt.Sprintf("http://%s%s/%s%s", - s3a.option.Filer, s3a.option.BucketsPath, bucket, r.RequestURI) + s3a.option.Filer, s3a.option.BucketsPath, bucket, object) - s3a.proxyToFiler(w, r, destUrl, passThroghResponse) + s3a.proxyToFiler(w, r, destUrl, passThroughResponse) } @@ -92,9 +102,10 @@ func (s3a *S3ApiServer) DeleteObjectHandler(w http.ResponseWriter, r *http.Reque vars := mux.Vars(r) bucket := vars["bucket"] + object := getObject(vars) destUrl := fmt.Sprintf("http://%s%s/%s%s", - s3a.option.Filer, s3a.option.BucketsPath, bucket, r.RequestURI) + s3a.option.Filer, s3a.option.BucketsPath, bucket, object) s3a.proxyToFiler(w, r, destUrl, func(proxyResonse *http.Response, w http.ResponseWriter) { for k, v := range proxyResonse.Header { @@ -105,10 +116,97 @@ func (s3a *S3ApiServer) DeleteObjectHandler(w http.ResponseWriter, r *http.Reque } +/// ObjectIdentifier carries key name for the object to delete. +type ObjectIdentifier struct { + ObjectName string `xml:"Key"` +} + +// DeleteObjectsRequest - xml carrying the object key names which needs to be deleted. +type DeleteObjectsRequest struct { + // Element to enable quiet mode for the request + Quiet bool + // List of objects to be deleted + Objects []ObjectIdentifier `xml:"Object"` +} + +// DeleteError structure. +type DeleteError struct { + Code string + Message string + Key string +} + +// DeleteObjectsResponse container for multiple object deletes. +type DeleteObjectsResponse struct { + XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ DeleteResult" json:"-"` + + // Collection of all deleted objects + DeletedObjects []ObjectIdentifier `xml:"Deleted,omitempty"` + + // Collection of errors deleting certain objects. + Errors []DeleteError `xml:"Error,omitempty"` +} + // DeleteMultipleObjectsHandler - Delete multiple objects func (s3a *S3ApiServer) DeleteMultipleObjectsHandler(w http.ResponseWriter, r *http.Request) { - // TODO - writeErrorResponse(w, ErrNotImplemented, r.URL) + + vars := mux.Vars(r) + bucket := vars["bucket"] + + deleteXMLBytes, err := ioutil.ReadAll(r.Body) + if err != nil { + writeErrorResponse(w, ErrInternalError, r.URL) + return + } + + deleteObjects := &DeleteObjectsRequest{} + if err := xml.Unmarshal(deleteXMLBytes, deleteObjects); err != nil { + writeErrorResponse(w, ErrMalformedXML, r.URL) + return + } + + var index int + + var deletedObjects []ObjectIdentifier + var deleteErrors []DeleteError + s3a.streamRemove(deleteObjects.Quiet, func() (finished bool, parentDirectoryPath string, entryName string, isDeleteData, isRecursive bool) { + if index >= len(deleteObjects.Objects) { + finished = true + return + } + + object := deleteObjects.Objects[index] + + lastSeparator := strings.LastIndex(object.ObjectName, "/") + parentDirectoryPath, entryName, isDeleteData, isRecursive = "/", object.ObjectName, true, false + if lastSeparator > 0 && lastSeparator+1 < len(object.ObjectName) { + entryName = object.ObjectName[lastSeparator+1:] + parentDirectoryPath = "/" + object.ObjectName[:lastSeparator] + } + parentDirectoryPath = fmt.Sprintf("%s/%s%s", s3a.option.BucketsPath, bucket, parentDirectoryPath) + return + }, func(err string) { + object := deleteObjects.Objects[index] + if err == "" { + deletedObjects = append(deletedObjects, object) + } else { + deleteErrors = append(deleteErrors, DeleteError{ + Code: "", + Message: err, + Key: object.ObjectName, + }) + } + index++ + }) + + deleteResp := DeleteObjectsResponse{} + if !deleteObjects.Quiet { + deleteResp.DeletedObjects = deletedObjects + } + deleteResp.Errors = deleteErrors + + writeSuccessResponseXML(w, encodeResponse(deleteResp)) + } func (s3a *S3ApiServer) proxyToFiler(w http.ResponseWriter, r *http.Request, destUrl string, responseFn func(proxyResonse *http.Response, w http.ResponseWriter)) { @@ -139,11 +237,12 @@ func (s3a *S3ApiServer) proxyToFiler(w http.ResponseWriter, r *http.Request, des writeErrorResponse(w, ErrInternalError, r.URL) return } - defer resp.Body.Close() + defer util.CloseResponse(resp) responseFn(resp, w) + } -func passThroghResponse(proxyResonse *http.Response, w http.ResponseWriter) { +func passThroughResponse(proxyResonse *http.Response, w http.ResponseWriter) { for k, v := range proxyResonse.Header { w.Header()[k] = v } @@ -151,10 +250,10 @@ func passThroghResponse(proxyResonse *http.Response, w http.ResponseWriter) { io.Copy(w, proxyResonse.Body) } -func (s3a *S3ApiServer) putToFiler(r *http.Request, uploadUrl string, dataReader io.ReadCloser) (etag string, code ErrorCode) { +func (s3a *S3ApiServer) putToFiler(r *http.Request, uploadUrl string, dataReader io.Reader) (etag string, code ErrorCode) { hash := md5.New() - var body io.Reader = io.TeeReader(dataReader, hash) + var body = io.TeeReader(dataReader, hash) proxyReq, err := http.NewRequest("PUT", uploadUrl, body) @@ -174,8 +273,6 @@ func (s3a *S3ApiServer) putToFiler(r *http.Request, uploadUrl string, dataReader resp, postErr := client.Do(proxyReq) - dataReader.Close() - if postErr != nil { glog.Errorf("post to filer: %v", postErr) return "", ErrInternalError diff --git a/weed/s3api/s3api_object_multipart_handlers.go b/weed/s3api/s3api_object_multipart_handlers.go index 267d126c5..3282e4176 100644 --- a/weed/s3api/s3api_object_multipart_handlers.go +++ b/weed/s3api/s3api_object_multipart_handlers.go @@ -2,20 +2,21 @@ package s3api import ( "fmt" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/s3" - "github.com/gorilla/mux" "net/http" "net/url" "strconv" "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/gorilla/mux" ) const ( - maxObjectList = 1000 // Limit number of objects in a listObjectsResponse. - maxUploadsList = 1000 // Limit number of uploads in a listUploadsResponse. - maxPartsList = 1000 // Limit number of parts in a listPartsResponse. - globalMaxPartID = 10000 + maxObjectListSizeLimit = 10000 // Limit number of objects in a listObjectsResponse. + maxUploadsList = 10000 // Limit number of uploads in a listUploadsResponse. + maxPartsList = 10000 // Limit number of parts in a listPartsResponse. + globalMaxPartID = 100000 ) // NewMultipartUploadHandler - New multipart upload. @@ -27,7 +28,7 @@ func (s3a *S3ApiServer) NewMultipartUploadHandler(w http.ResponseWriter, r *http response, errCode := s3a.createMultipartUpload(&s3.CreateMultipartUploadInput{ Bucket: aws.String(bucket), - Key: aws.String(object), + Key: objectKey(aws.String(object)), }) if errCode != ErrNone { @@ -52,7 +53,7 @@ func (s3a *S3ApiServer) CompleteMultipartUploadHandler(w http.ResponseWriter, r response, errCode := s3a.completeMultipartUpload(&s3.CompleteMultipartUploadInput{ Bucket: aws.String(bucket), - Key: aws.String(object), + Key: objectKey(aws.String(object)), UploadId: aws.String(uploadID), }) @@ -78,7 +79,7 @@ func (s3a *S3ApiServer) AbortMultipartUploadHandler(w http.ResponseWriter, r *ht response, errCode := s3a.abortMultipartUpload(&s3.AbortMultipartUploadInput{ Bucket: aws.String(bucket), - Key: aws.String(object), + Key: objectKey(aws.String(object)), UploadId: aws.String(uploadID), }) @@ -150,7 +151,7 @@ func (s3a *S3ApiServer) ListObjectPartsHandler(w http.ResponseWriter, r *http.Re response, errCode := s3a.listObjectParts(&s3.ListPartsInput{ Bucket: aws.String(bucket), - Key: aws.String(object), + Key: objectKey(aws.String(object)), MaxParts: aws.Int64(int64(maxParts)), PartNumberMarker: aws.Int64(int64(partNumberMarker)), UploadId: aws.String(uploadID), @@ -192,13 +193,19 @@ func (s3a *S3ApiServer) PutObjectPartHandler(w http.ResponseWriter, r *http.Requ return } + var s3ErrCode ErrorCode dataReader := r.Body if rAuthType == authTypeStreamingSigned { - dataReader = newSignV4ChunkedReader(r) + dataReader, s3ErrCode = s3a.iam.newSignV4ChunkedReader(r) + } + if s3ErrCode != ErrNone { + writeErrorResponse(w, s3ErrCode, r.URL) + return } + defer dataReader.Close() - uploadUrl := fmt.Sprintf("http://%s%s/%s/%04d.part", - s3a.option.Filer, s3a.genUploadsFolder(bucket), uploadID, partID-1) + uploadUrl := fmt.Sprintf("http://%s%s/%s/%04d.part?collection=%s", + s3a.option.Filer, s3a.genUploadsFolder(bucket), uploadID, partID-1, bucket) etag, errCode := s3a.putToFiler(r, uploadUrl, dataReader) diff --git a/weed/s3api/s3api_objects_list_handlers.go b/weed/s3api/s3api_objects_list_handlers.go index d751a3b1d..5006df6a0 100644 --- a/weed/s3api/s3api_objects_list_handlers.go +++ b/weed/s3api/s3api_objects_list_handlers.go @@ -3,22 +3,19 @@ package s3api import ( "context" "fmt" + "io" "net/http" "net/url" "path/filepath" "strconv" + "strings" "time" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/s3" + "github.com/gorilla/mux" + "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/gorilla/mux" -) - -const ( - maxObjectListSizeLimit = 1000 // Limit number of objects in a listObjectsResponse. ) func (s3a *S3ApiServer) ListObjectsV2Handler(w http.ResponseWriter, r *http.Request) { @@ -85,10 +82,13 @@ func (s3a *S3ApiServer) ListObjectsV1Handler(w http.ResponseWriter, r *http.Requ writeSuccessResponseXML(w, encodeResponse(response)) } -func (s3a *S3ApiServer) listFilerEntries(bucket, originalPrefix string, maxKeys int, marker string) (response *s3.ListObjectsOutput, err error) { +func (s3a *S3ApiServer) listFilerEntries(bucket, originalPrefix string, maxKeys int, marker string) (response ListBucketResult, err error) { // convert full path prefix into directory name and prefix for entry name dir, prefix := filepath.Split(originalPrefix) + if strings.HasPrefix(dir, "/") { + dir = dir[1:] + } // check filer err = s3a.withFilerClient(func(client filer_pb.SeaweedFilerClient) error { @@ -101,17 +101,28 @@ func (s3a *S3ApiServer) listFilerEntries(bucket, originalPrefix string, maxKeys InclusiveStartFrom: false, } - resp, err := client.ListEntries(context.Background(), request) + stream, err := client.ListEntries(context.Background(), request) if err != nil { return fmt.Errorf("list buckets: %v", err) } - var contents []*s3.Object - var commonPrefixes []*s3.CommonPrefix + var contents []ListEntry + var commonPrefixes []PrefixEntry var counter int var lastEntryName string var isTruncated bool - for _, entry := range resp.Entries { + + for { + resp, recvErr := stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + break + } else { + return recvErr + } + } + + entry := resp.Entry counter++ if counter > maxKeys { isTruncated = true @@ -119,37 +130,40 @@ func (s3a *S3ApiServer) listFilerEntries(bucket, originalPrefix string, maxKeys } lastEntryName = entry.Name if entry.IsDirectory { - commonPrefixes = append(commonPrefixes, &s3.CommonPrefix{ - Prefix: aws.String(fmt.Sprintf("%s%s/", dir, entry.Name)), - }) + if entry.Name != ".uploads" { + commonPrefixes = append(commonPrefixes, PrefixEntry{ + Prefix: fmt.Sprintf("%s%s/", dir, entry.Name), + }) + } } else { - contents = append(contents, &s3.Object{ - Key: aws.String(fmt.Sprintf("%s%s", dir, entry.Name)), - LastModified: aws.Time(time.Unix(entry.Attributes.Mtime, 0)), - ETag: aws.String("\"" + filer2.ETag(entry.Chunks) + "\""), - Size: aws.Int64(int64(filer2.TotalSize(entry.Chunks))), - Owner: &s3.Owner{ - ID: aws.String("bcaf161ca5fb16fd081034f"), - DisplayName: aws.String("webfile"), + contents = append(contents, ListEntry{ + Key: fmt.Sprintf("%s%s", dir, entry.Name), + LastModified: time.Unix(entry.Attributes.Mtime, 0), + ETag: "\"" + filer2.ETag(entry.Chunks) + "\"", + Size: int64(filer2.TotalSize(entry.Chunks)), + Owner: CanonicalUser{ + ID: fmt.Sprintf("%x", entry.Attributes.Uid), + DisplayName: entry.Attributes.UserName, }, - StorageClass: aws.String("STANDARD"), + StorageClass: "STANDARD", }) } + } - response = &s3.ListObjectsOutput{ - Name: aws.String(bucket), - Prefix: aws.String(originalPrefix), - Marker: aws.String(marker), - NextMarker: aws.String(lastEntryName), - MaxKeys: aws.Int64(int64(maxKeys)), - Delimiter: aws.String("/"), - IsTruncated: aws.Bool(isTruncated), + response = ListBucketResult{ + Name: bucket, + Prefix: originalPrefix, + Marker: marker, + NextMarker: lastEntryName, + MaxKeys: maxKeys, + Delimiter: "/", + IsTruncated: isTruncated, Contents: contents, CommonPrefixes: commonPrefixes, } - glog.V(4).Infof("read directory: %v, found: %v", request, counter) + glog.V(4).Infof("read directory: %v, found: %v, %+v", request, counter, response) return nil }) diff --git a/weed/s3api/s3api_objects_list_handlers_test.go b/weed/s3api/s3api_objects_list_handlers_test.go new file mode 100644 index 000000000..7b87b32fb --- /dev/null +++ b/weed/s3api/s3api_objects_list_handlers_test.go @@ -0,0 +1,38 @@ +package s3api + +import ( + "testing" + "time" +) + +func TestListObjectsHandler(t *testing.T) { + + // https://docs.aws.amazon.com/AmazonS3/latest/API/v2-RESTBucketGET.html + + expected := `<?xml version="1.0" encoding="UTF-8"?> +<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Name>test_container</Name><Prefix></Prefix><Marker></Marker><MaxKeys>1000</MaxKeys><IsTruncated>false</IsTruncated><Contents><Key>1.zip</Key><ETag>"4397da7a7649e8085de9916c240e8166"</ETag><Size>1234567</Size><Owner><ID>65a011niqo39cdf8ec533ec3d1ccaafsa932</ID></Owner><StorageClass>STANDARD</StorageClass><LastModified>2011-04-09T12:34:49Z</LastModified></Contents></ListBucketResult>` + + response := ListBucketResult{ + Name: "test_container", + Prefix: "", + Marker: "", + NextMarker: "", + MaxKeys: 1000, + IsTruncated: false, + Contents: []ListEntry{{ + Key: "1.zip", + LastModified: time.Date(2011, 4, 9, 12, 34, 49, 0, time.UTC), + ETag: "\"4397da7a7649e8085de9916c240e8166\"", + Size: 1234567, + Owner: CanonicalUser{ + ID: "65a011niqo39cdf8ec533ec3d1ccaafsa932", + }, + StorageClass: "STANDARD", + }}, + } + + encoded := string(encodeResponse(response)) + if encoded != expected { + t.Errorf("unexpected output: %s\nexpecting:%s", encoded, expected) + } +} diff --git a/weed/s3api/s3api_server.go b/weed/s3api/s3api_server.go index db798a546..773094a5f 100644 --- a/weed/s3api/s3api_server.go +++ b/weed/s3api/s3api_server.go @@ -1,30 +1,30 @@ package s3api import ( - _ "github.com/chrislusf/seaweedfs/weed/filer2/cassandra" - _ "github.com/chrislusf/seaweedfs/weed/filer2/leveldb" - _ "github.com/chrislusf/seaweedfs/weed/filer2/memdb" - _ "github.com/chrislusf/seaweedfs/weed/filer2/mysql" - _ "github.com/chrislusf/seaweedfs/weed/filer2/postgres" - _ "github.com/chrislusf/seaweedfs/weed/filer2/redis" - "github.com/gorilla/mux" "net/http" + + "github.com/gorilla/mux" + "google.golang.org/grpc" ) type S3ApiServerOption struct { Filer string FilerGrpcAddress string + Config string DomainName string BucketsPath string + GrpcDialOption grpc.DialOption } type S3ApiServer struct { option *S3ApiServerOption + iam *IdentityAccessManagement } func NewS3ApiServer(router *mux.Router, option *S3ApiServerOption) (s3ApiServer *S3ApiServer, err error) { s3ApiServer = &S3ApiServer{ option: option, + iam: NewIdentityAccessManagement(option.Config, option.DomainName), } s3ApiServer.registerRouter(router) @@ -44,48 +44,47 @@ func (s3a *S3ApiServer) registerRouter(router *mux.Router) { for _, bucket := range routers { // HeadObject - bucket.Methods("HEAD").Path("/{object:.+}").HandlerFunc(s3a.HeadObjectHandler) + bucket.Methods("HEAD").Path("/{object:.+}").HandlerFunc(s3a.iam.Auth(s3a.HeadObjectHandler, ACTION_READ)) // HeadBucket - bucket.Methods("HEAD").HandlerFunc(s3a.HeadBucketHandler) + bucket.Methods("HEAD").HandlerFunc(s3a.iam.Auth(s3a.HeadBucketHandler, ACTION_ADMIN)) + // CopyObjectPart + bucket.Methods("PUT").Path("/{object:.+}").HeadersRegexp("X-Amz-Copy-Source", ".*?(\\/|%2F).*?").HandlerFunc(s3a.iam.Auth(s3a.CopyObjectPartHandler, ACTION_WRITE)).Queries("partNumber", "{partNumber:[0-9]+}", "uploadId", "{uploadId:.*}") // PutObjectPart - bucket.Methods("PUT").Path("/{object:.+}").HandlerFunc(s3a.PutObjectPartHandler).Queries("partNumber", "{partNumber:[0-9]+}", "uploadId", "{uploadId:.*}") + bucket.Methods("PUT").Path("/{object:.+}").HandlerFunc(s3a.iam.Auth(s3a.PutObjectPartHandler, ACTION_WRITE)).Queries("partNumber", "{partNumber:[0-9]+}", "uploadId", "{uploadId:.*}") // CompleteMultipartUpload - bucket.Methods("POST").Path("/{object:.+}").HandlerFunc(s3a.CompleteMultipartUploadHandler).Queries("uploadId", "{uploadId:.*}") + bucket.Methods("POST").Path("/{object:.+}").HandlerFunc(s3a.iam.Auth(s3a.CompleteMultipartUploadHandler, ACTION_WRITE)).Queries("uploadId", "{uploadId:.*}") // NewMultipartUpload - bucket.Methods("POST").Path("/{object:.+}").HandlerFunc(s3a.NewMultipartUploadHandler).Queries("uploads", "") + bucket.Methods("POST").Path("/{object:.+}").HandlerFunc(s3a.iam.Auth(s3a.NewMultipartUploadHandler, ACTION_WRITE)).Queries("uploads", "") // AbortMultipartUpload - bucket.Methods("DELETE").Path("/{object:.+}").HandlerFunc(s3a.AbortMultipartUploadHandler).Queries("uploadId", "{uploadId:.*}") + bucket.Methods("DELETE").Path("/{object:.+}").HandlerFunc(s3a.iam.Auth(s3a.AbortMultipartUploadHandler, ACTION_WRITE)).Queries("uploadId", "{uploadId:.*}") // ListObjectParts - bucket.Methods("GET").Path("/{object:.+}").HandlerFunc(s3a.ListObjectPartsHandler).Queries("uploadId", "{uploadId:.*}") + bucket.Methods("GET").Path("/{object:.+}").HandlerFunc(s3a.iam.Auth(s3a.ListObjectPartsHandler, ACTION_WRITE)).Queries("uploadId", "{uploadId:.*}") // ListMultipartUploads - bucket.Methods("GET").HandlerFunc(s3a.ListMultipartUploadsHandler).Queries("uploads", "") + bucket.Methods("GET").HandlerFunc(s3a.iam.Auth(s3a.ListMultipartUploadsHandler, ACTION_WRITE)).Queries("uploads", "") + // CopyObject + bucket.Methods("PUT").Path("/{object:.+}").HeadersRegexp("X-Amz-Copy-Source", ".*?(\\/|%2F).*?").HandlerFunc(s3a.iam.Auth(s3a.CopyObjectHandler, ACTION_WRITE)) // PutObject - bucket.Methods("PUT").Path("/{object:.+}").HandlerFunc(s3a.PutObjectHandler) + bucket.Methods("PUT").Path("/{object:.+}").HandlerFunc(s3a.iam.Auth(s3a.PutObjectHandler, ACTION_WRITE)) // PutBucket - bucket.Methods("PUT").HandlerFunc(s3a.PutBucketHandler) + bucket.Methods("PUT").HandlerFunc(s3a.iam.Auth(s3a.PutBucketHandler, ACTION_ADMIN)) // DeleteObject - bucket.Methods("DELETE").Path("/{object:.+}").HandlerFunc(s3a.DeleteObjectHandler) + bucket.Methods("DELETE").Path("/{object:.+}").HandlerFunc(s3a.iam.Auth(s3a.DeleteObjectHandler, ACTION_WRITE)) // DeleteBucket - bucket.Methods("DELETE").HandlerFunc(s3a.DeleteBucketHandler) + bucket.Methods("DELETE").HandlerFunc(s3a.iam.Auth(s3a.DeleteBucketHandler, ACTION_WRITE)) // ListObjectsV2 - bucket.Methods("GET").HandlerFunc(s3a.ListObjectsV2Handler).Queries("list-type", "2") + bucket.Methods("GET").HandlerFunc(s3a.iam.Auth(s3a.ListObjectsV2Handler, ACTION_READ)).Queries("list-type", "2") // GetObject, but directory listing is not supported - bucket.Methods("GET").Path("/{object:.+}").HandlerFunc(s3a.GetObjectHandler) + bucket.Methods("GET").Path("/{object:.+}").HandlerFunc(s3a.iam.Auth(s3a.GetObjectHandler, ACTION_READ)) // ListObjectsV1 (Legacy) - bucket.Methods("GET").HandlerFunc(s3a.ListObjectsV1Handler) + bucket.Methods("GET").HandlerFunc(s3a.iam.Auth(s3a.ListObjectsV1Handler, ACTION_READ)) // DeleteMultipleObjects - bucket.Methods("POST").HandlerFunc(s3a.DeleteMultipleObjectsHandler).Queries("delete", "") + bucket.Methods("POST").HandlerFunc(s3a.iam.Auth(s3a.DeleteMultipleObjectsHandler, ACTION_WRITE)).Queries("delete", "") /* - // CopyObject - bucket.Methods("PUT").Path("/{object:.+}").HeadersRegexp("X-Amz-Copy-Source", ".*?(\\/|%2F).*?").HandlerFunc(s3a.CopyObjectHandler) - - // CopyObjectPart - bucket.Methods("PUT").Path("/{object:.+}").HeadersRegexp("X-Amz-Copy-Source", ".*?(\\/|%2F).*?").HandlerFunc(s3a.CopyObjectPartHandler).Queries("partNumber", "{partNumber:[0-9]+}", "uploadId", "{uploadId:.*}") // not implemented // GetBucketLocation @@ -107,7 +106,7 @@ func (s3a *S3ApiServer) registerRouter(router *mux.Router) { } // ListBuckets - apiRouter.Methods("GET").Path("/").HandlerFunc(s3a.ListBucketsHandler) + apiRouter.Methods("GET").Path("/").HandlerFunc(s3a.iam.Auth(s3a.ListBucketsHandler, ACTION_ADMIN)) // NotFound apiRouter.NotFoundHandler = http.HandlerFunc(notFoundHandler) diff --git a/weed/s3api/s3api_test.go b/weed/s3api/s3api_test.go new file mode 100644 index 000000000..026766beb --- /dev/null +++ b/weed/s3api/s3api_test.go @@ -0,0 +1,32 @@ +package s3api + +import ( + "testing" + "time" +) + +func TestCopyObjectResponse(t *testing.T) { + + // https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html + + response := CopyObjectResult{ + ETag: "12345678", + LastModified: time.Now(), + } + + println(string(encodeResponse(response))) + +} + +func TestCopyPartResponse(t *testing.T) { + + // https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPartCopy.html + + response := CopyPartResult{ + ETag: "12345678", + LastModified: time.Now(), + } + + println(string(encodeResponse(response))) + +} diff --git a/weed/s3api/s3api_xsd_generated.go b/weed/s3api/s3api_xsd_generated.go index 915b74ec4..9d62afc4e 100644 --- a/weed/s3api/s3api_xsd_generated.go +++ b/weed/s3api/s3api_xsd_generated.go @@ -25,8 +25,8 @@ type BucketLoggingStatus struct { } type CanonicalUser struct { - ID string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ID"` - DisplayName string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ DisplayName,omitempty"` + ID string `xml:"ID"` + DisplayName string `xml:"DisplayName,omitempty"` } type CopyObject struct { @@ -506,15 +506,15 @@ func (t *ListAllMyBuckets) UnmarshalXML(d *xml.Decoder, start xml.StartElement) } type ListAllMyBucketsEntry struct { - Name string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Name"` - CreationDate time.Time `xml:"http://s3.amazonaws.com/doc/2006-03-01/ CreationDate"` + Name string `xml:"Name"` + CreationDate time.Time `xml:"CreationDate"` } func (t *ListAllMyBucketsEntry) MarshalXML(e *xml.Encoder, start xml.StartElement) error { type T ListAllMyBucketsEntry var layout struct { *T - CreationDate *xsdDateTime `xml:"http://s3.amazonaws.com/doc/2006-03-01/ CreationDate"` + CreationDate *xsdDateTime `xml:"CreationDate"` } layout.T = (*T)(t) layout.CreationDate = (*xsdDateTime)(&layout.T.CreationDate) @@ -524,7 +524,7 @@ func (t *ListAllMyBucketsEntry) UnmarshalXML(d *xml.Decoder, start xml.StartElem type T ListAllMyBucketsEntry var overlay struct { *T - CreationDate *xsdDateTime `xml:"http://s3.amazonaws.com/doc/2006-03-01/ CreationDate"` + CreationDate *xsdDateTime `xml:"CreationDate"` } overlay.T = (*T)(t) overlay.CreationDate = (*xsdDateTime)(&overlay.T.CreationDate) @@ -532,18 +532,13 @@ func (t *ListAllMyBucketsEntry) UnmarshalXML(d *xml.Decoder, start xml.StartElem } type ListAllMyBucketsList struct { - Bucket []ListAllMyBucketsEntry `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Bucket,omitempty"` + Bucket []ListAllMyBucketsEntry `xml:"Bucket,omitempty"` } type ListAllMyBucketsResponse struct { ListAllMyBucketsResponse ListAllMyBucketsResult `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListAllMyBucketsResponse"` } -type ListAllMyBucketsResult struct { - Owner CanonicalUser `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Owner"` - Buckets ListAllMyBucketsList `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Buckets"` -} - type ListBucket struct { Bucket string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Bucket"` Prefix string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Prefix,omitempty"` @@ -582,32 +577,33 @@ type ListBucketResponse struct { } type ListBucketResult struct { - Metadata []MetadataEntry `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Metadata,omitempty"` - Name string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Name"` - Prefix string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Prefix"` - Marker string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Marker"` - NextMarker string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ NextMarker,omitempty"` - MaxKeys int `xml:"http://s3.amazonaws.com/doc/2006-03-01/ MaxKeys"` - Delimiter string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Delimiter,omitempty"` - IsTruncated bool `xml:"http://s3.amazonaws.com/doc/2006-03-01/ IsTruncated"` - Contents []ListEntry `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Contents,omitempty"` - CommonPrefixes []PrefixEntry `xml:"http://s3.amazonaws.com/doc/2006-03-01/ CommonPrefixes,omitempty"` + XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"` + Metadata []MetadataEntry `xml:"Metadata,omitempty"` + Name string `xml:"Name"` + Prefix string `xml:"Prefix"` + Marker string `xml:"Marker"` + NextMarker string `xml:"NextMarker,omitempty"` + MaxKeys int `xml:"MaxKeys"` + Delimiter string `xml:"Delimiter,omitempty"` + IsTruncated bool `xml:"IsTruncated"` + Contents []ListEntry `xml:"Contents,omitempty"` + CommonPrefixes []PrefixEntry `xml:"CommonPrefixes,omitempty"` } type ListEntry struct { - Key string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Key"` - LastModified time.Time `xml:"http://s3.amazonaws.com/doc/2006-03-01/ LastModified"` - ETag string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ETag"` - Size int64 `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Size"` - Owner CanonicalUser `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Owner,omitempty"` - StorageClass StorageClass `xml:"http://s3.amazonaws.com/doc/2006-03-01/ StorageClass"` + Key string `xml:"Key"` + LastModified time.Time `xml:"LastModified"` + ETag string `xml:"ETag"` + Size int64 `xml:"Size"` + Owner CanonicalUser `xml:"Owner,omitempty"` + StorageClass StorageClass `xml:"StorageClass"` } func (t *ListEntry) MarshalXML(e *xml.Encoder, start xml.StartElement) error { type T ListEntry var layout struct { *T - LastModified *xsdDateTime `xml:"http://s3.amazonaws.com/doc/2006-03-01/ LastModified"` + LastModified *xsdDateTime `xml:"LastModified"` } layout.T = (*T)(t) layout.LastModified = (*xsdDateTime)(&layout.T.LastModified) @@ -617,7 +613,7 @@ func (t *ListEntry) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { type T ListEntry var overlay struct { *T - LastModified *xsdDateTime `xml:"http://s3.amazonaws.com/doc/2006-03-01/ LastModified"` + LastModified *xsdDateTime `xml:"LastModified"` } overlay.T = (*T)(t) overlay.LastModified = (*xsdDateTime)(&overlay.T.LastModified) @@ -679,7 +675,7 @@ type PostResponse struct { } type PrefixEntry struct { - Prefix string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ Prefix"` + Prefix string `xml:"Prefix"` } type PutObject struct { @@ -970,10 +966,10 @@ func (b xsdBase64Binary) MarshalText() ([]byte, error) { type xsdDateTime time.Time func (t *xsdDateTime) UnmarshalText(text []byte) error { - return _unmarshalTime(text, (*time.Time)(t), "2006-01-02T15:04:05.999999999") + return _unmarshalTime(text, (*time.Time)(t), s3TimeFormat) } func (t xsdDateTime) MarshalText() ([]byte, error) { - return []byte((time.Time)(t).Format("2006-01-02T15:04:05.999999999")), nil + return []byte((time.Time)(t).Format(s3TimeFormat)), nil } func (t xsdDateTime) MarshalXML(e *xml.Encoder, start xml.StartElement) error { if (time.Time)(t).IsZero() { diff --git a/weed/security/guard.go b/weed/security/guard.go index dea3b12f2..17fe2ea9e 100644 --- a/weed/security/guard.go +++ b/weed/security/guard.go @@ -41,21 +41,30 @@ https://github.com/pkieltyka/jwtauth/blob/master/jwtauth.go */ type Guard struct { - whiteList []string - SecretKey Secret + whiteList []string + SigningKey SigningKey + ExpiresAfterSec int + ReadSigningKey SigningKey + ReadExpiresAfterSec int - isActive bool + isWriteActive bool } -func NewGuard(whiteList []string, secretKey string) *Guard { - g := &Guard{whiteList: whiteList, SecretKey: Secret(secretKey)} - g.isActive = len(g.whiteList) != 0 || len(g.SecretKey) != 0 +func NewGuard(whiteList []string, signingKey string, expiresAfterSec int, readSigningKey string, readExpiresAfterSec int) *Guard { + g := &Guard{ + whiteList: whiteList, + SigningKey: SigningKey(signingKey), + ExpiresAfterSec: expiresAfterSec, + ReadSigningKey: SigningKey(readSigningKey), + ReadExpiresAfterSec: readExpiresAfterSec, + } + g.isWriteActive = len(g.whiteList) != 0 || len(g.SigningKey) != 0 return g } func (g *Guard) WhiteList(f func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) { - if !g.isActive { - //if no security needed, just skip all checkings + if !g.isWriteActive { + //if no security needed, just skip all checking return f } return func(w http.ResponseWriter, r *http.Request) { @@ -67,20 +76,6 @@ func (g *Guard) WhiteList(f func(w http.ResponseWriter, r *http.Request)) func(w } } -func (g *Guard) Secure(f func(w http.ResponseWriter, r *http.Request)) func(w http.ResponseWriter, r *http.Request) { - if !g.isActive { - //if no security needed, just skip all checkings - return f - } - return func(w http.ResponseWriter, r *http.Request) { - if err := g.checkJwt(w, r); err != nil { - w.WriteHeader(http.StatusUnauthorized) - return - } - f(w, r) - } -} - func GetActualRemoteHost(r *http.Request) (host string, err error) { host = r.Header.Get("HTTP_X_FORWARDED_FOR") if host == "" { @@ -130,33 +125,3 @@ func (g *Guard) checkWhiteList(w http.ResponseWriter, r *http.Request) error { glog.V(0).Infof("Not in whitelist: %s", r.RemoteAddr) return fmt.Errorf("Not in whitelis: %s", r.RemoteAddr) } - -func (g *Guard) checkJwt(w http.ResponseWriter, r *http.Request) error { - if g.checkWhiteList(w, r) == nil { - return nil - } - - if len(g.SecretKey) == 0 { - return nil - } - - tokenStr := GetJwt(r) - - if tokenStr == "" { - return ErrUnauthorized - } - - // Verify the token - token, err := DecodeJwt(g.SecretKey, tokenStr) - if err != nil { - glog.V(1).Infof("Token verification error from %s: %v", r.RemoteAddr, err) - return ErrUnauthorized - } - if !token.Valid { - glog.V(1).Infof("Token invliad from %s: %v", r.RemoteAddr, tokenStr) - return ErrUnauthorized - } - - glog.V(1).Infof("No permission from %s", r.RemoteAddr) - return fmt.Errorf("No write permisson from %s", r.RemoteAddr) -} diff --git a/weed/security/jwt.go b/weed/security/jwt.go index 46b7efaaf..0bd7fa974 100644 --- a/weed/security/jwt.go +++ b/weed/security/jwt.go @@ -1,9 +1,9 @@ package security import ( + "fmt" "net/http" "strings" - "time" "github.com/chrislusf/seaweedfs/weed/glog" @@ -11,21 +11,29 @@ import ( ) type EncodedJwt string -type Secret string +type SigningKey []byte + +type SeaweedFileIdClaims struct { + Fid string `json:"fid"` + jwt.StandardClaims +} -func GenJwt(secret Secret, fileId string) EncodedJwt { - if secret == "" { +func GenJwt(signingKey SigningKey, expiresAfterSec int, fileId string) EncodedJwt { + if len(signingKey) == 0 { return "" } - t := jwt.New(jwt.GetSigningMethod("HS256")) - t.Claims = &jwt.StandardClaims{ - ExpiresAt: time.Now().Add(time.Second * 10).Unix(), - Subject: fileId, + claims := SeaweedFileIdClaims{ + fileId, + jwt.StandardClaims{}, + } + if expiresAfterSec > 0 { + claims.ExpiresAt = time.Now().Add(time.Second * time.Duration(expiresAfterSec)).Unix() } - encoded, e := t.SignedString(secret) + t := jwt.NewWithClaims(jwt.SigningMethodHS256, claims) + encoded, e := t.SignedString([]byte(signingKey)) if e != nil { - glog.V(0).Infof("Failed to sign claims: %v", t.Claims) + glog.V(0).Infof("Failed to sign claims %+v: %v", t.Claims, e) return "" } return EncodedJwt(encoded) @@ -44,31 +52,15 @@ func GetJwt(r *http.Request) EncodedJwt { } } - // Get token from cookie - if tokenStr == "" { - cookie, err := r.Cookie("jwt") - if err == nil { - tokenStr = cookie.Value - } - } - return EncodedJwt(tokenStr) } -func EncodeJwt(secret Secret, claims *jwt.StandardClaims) (EncodedJwt, error) { - if secret == "" { - return "", nil - } - - t := jwt.New(jwt.GetSigningMethod("HS256")) - t.Claims = claims - encoded, e := t.SignedString(secret) - return EncodedJwt(encoded), e -} - -func DecodeJwt(secret Secret, tokenString EncodedJwt) (token *jwt.Token, err error) { +func DecodeJwt(signingKey SigningKey, tokenString EncodedJwt) (token *jwt.Token, err error) { // check exp, nbf - return jwt.Parse(string(tokenString), func(token *jwt.Token) (interface{}, error) { - return secret, nil + return jwt.ParseWithClaims(string(tokenString), &SeaweedFileIdClaims{}, func(token *jwt.Token) (interface{}, error) { + if _, ok := token.Method.(*jwt.SigningMethodHMAC); !ok { + return nil, fmt.Errorf("unknown token method") + } + return []byte(signingKey), nil }) } diff --git a/weed/security/tls.go b/weed/security/tls.go new file mode 100644 index 000000000..1832e6e07 --- /dev/null +++ b/weed/security/tls.go @@ -0,0 +1,68 @@ +package security + +import ( + "crypto/tls" + "crypto/x509" + "io/ioutil" + + "github.com/spf13/viper" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func LoadServerTLS(config *viper.Viper, component string) grpc.ServerOption { + if config == nil { + return nil + } + + // load cert/key, ca cert + cert, err := tls.LoadX509KeyPair(config.GetString(component+".cert"), config.GetString(component+".key")) + if err != nil { + glog.V(1).Infof("load cert/key error: %v", err) + return nil + } + caCert, err := ioutil.ReadFile(config.GetString(component + ".ca")) + if err != nil { + glog.V(1).Infof("read ca cert file error: %v", err) + return nil + } + caCertPool := x509.NewCertPool() + caCertPool.AppendCertsFromPEM(caCert) + ta := credentials.NewTLS(&tls.Config{ + Certificates: []tls.Certificate{cert}, + ClientCAs: caCertPool, + ClientAuth: tls.RequireAndVerifyClientCert, + }) + + return grpc.Creds(ta) +} + +func LoadClientTLS(config *viper.Viper, component string) grpc.DialOption { + if config == nil { + return grpc.WithInsecure() + } + + // load cert/key, cacert + cert, err := tls.LoadX509KeyPair(config.GetString(component+".cert"), config.GetString(component+".key")) + if err != nil { + glog.V(1).Infof("load cert/key error: %v", err) + return grpc.WithInsecure() + } + caCert, err := ioutil.ReadFile(config.GetString(component + ".ca")) + if err != nil { + glog.V(1).Infof("read ca cert file error: %v", err) + return grpc.WithInsecure() + } + caCertPool := x509.NewCertPool() + caCertPool.AppendCertsFromPEM(caCert) + + ta := credentials.NewTLS(&tls.Config{ + Certificates: []tls.Certificate{cert}, + RootCAs: caCertPool, + InsecureSkipVerify: true, + }) + return grpc.WithTransportCredentials(ta) +} diff --git a/weed/sequence/etcd_sequencer.go b/weed/sequence/etcd_sequencer.go new file mode 100644 index 000000000..1fc378640 --- /dev/null +++ b/weed/sequence/etcd_sequencer.go @@ -0,0 +1,296 @@ +package sequence + +/* +Note : +(1) store the sequence in the ETCD cluster, and local file(sequence.dat) +(2) batch get the sequences from ETCD cluster, and store the max sequence id in the local file +(3) the sequence range is : [currentSeqId, maxSeqId), when the currentSeqId >= maxSeqId, fetch the new maxSeqId. +*/ + +import ( + "context" + "fmt" + "sync" + "time" + + "io" + "os" + "strconv" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "go.etcd.io/etcd/client" +) + +const ( + // EtcdKeyPrefix = "/seaweedfs" + EtcdKeySequence = "/master/sequence" + EtcdContextTimeoutSecond = 100 * time.Second + DefaultEtcdSteps uint64 = 500 // internal counter + SequencerFileName = "sequencer.dat" + FileMaxSequenceLength = 128 +) + +type EtcdSequencer struct { + sequenceLock sync.Mutex + + // available sequence range : [currentSeqId, maxSeqId) + currentSeqId uint64 + maxSeqId uint64 + + keysAPI client.KeysAPI + seqFile *os.File +} + +func NewEtcdSequencer(etcdUrls string, metaFolder string) (*EtcdSequencer, error) { + file, err := openSequenceFile(metaFolder + "/" + SequencerFileName) + if nil != err { + return nil, fmt.Errorf("open sequence file fialed, %v", err) + } + + cli, err := client.New(client.Config{ + Endpoints: strings.Split(etcdUrls, ","), + Username: "", + Password: "", + }) + if err != nil { + return nil, err + } + keysApi := client.NewKeysAPI(cli) + + // TODO: the current sequence id in local file is not used + maxValue, _, err := readSequenceFile(file) + if err != nil { + return nil, fmt.Errorf("read sequence from file failed, %v", err) + } + glog.V(4).Infof("read sequence from file : %d", maxValue) + + newSeq, err := setMaxSequenceToEtcd(keysApi, maxValue) + if err != nil { + return nil, err + } + + sequencer := &EtcdSequencer{maxSeqId: newSeq, + currentSeqId: newSeq, + keysAPI: keysApi, + seqFile: file, + } + return sequencer, nil +} + +func (es *EtcdSequencer) NextFileId(count uint64) uint64 { + es.sequenceLock.Lock() + defer es.sequenceLock.Unlock() + + if (es.currentSeqId + count) >= es.maxSeqId { + reqSteps := DefaultEtcdSteps + if count > DefaultEtcdSteps { + reqSteps += count + } + maxId, err := batchGetSequenceFromEtcd(es.keysAPI, reqSteps) + glog.V(4).Infof("get max sequence id from etcd, %d", maxId) + if err != nil { + glog.Error(err) + return 0 + } + es.currentSeqId, es.maxSeqId = maxId-reqSteps, maxId + glog.V(4).Infof("current id : %d, max id : %d", es.currentSeqId, es.maxSeqId) + + if err := writeSequenceFile(es.seqFile, es.maxSeqId, es.currentSeqId); err != nil { + glog.Errorf("flush sequence to file failed, %v", err) + } + } + + ret := es.currentSeqId + es.currentSeqId += count + return ret +} + +/** +instead of collecting the max value from volume server, +the max value should be saved in local config file and ETCD cluster +*/ +func (es *EtcdSequencer) SetMax(seenValue uint64) { + es.sequenceLock.Lock() + defer es.sequenceLock.Unlock() + if seenValue > es.maxSeqId { + maxId, err := setMaxSequenceToEtcd(es.keysAPI, seenValue) + if err != nil { + glog.Errorf("set Etcd Max sequence failed : %v", err) + return + } + es.currentSeqId, es.maxSeqId = maxId, maxId + + if err := writeSequenceFile(es.seqFile, maxId, maxId); err != nil { + glog.Errorf("flush sequence to file failed, %v", err) + } + } +} + +func (es *EtcdSequencer) GetMax() uint64 { + return es.maxSeqId +} + +func (es *EtcdSequencer) Peek() uint64 { + return es.currentSeqId +} + +func batchGetSequenceFromEtcd(kvApi client.KeysAPI, step uint64) (uint64, error) { + if step <= 0 { + return 0, fmt.Errorf("the step must be large than 1") + } + + ctx, cancel := context.WithTimeout(context.Background(), EtcdContextTimeoutSecond) + var endSeqValue uint64 = 0 + defer cancel() + for { + getResp, err := kvApi.Get(ctx, EtcdKeySequence, &client.GetOptions{Recursive: false, Quorum: true}) + if err != nil { + return 0, err + } + if getResp.Node == nil { + continue + } + + prevValue := getResp.Node.Value + prevSeqValue, err := strconv.ParseUint(prevValue, 10, 64) + if err != nil { + return 0, fmt.Errorf("get sequence from etcd failed, %v", err) + } + endSeqValue = prevSeqValue + step + endSeqStr := strconv.FormatUint(endSeqValue, 10) + + _, err = kvApi.Set(ctx, EtcdKeySequence, endSeqStr, &client.SetOptions{PrevValue: prevValue}) + if err == nil { + break + } + glog.Error(err) + } + + return endSeqValue, nil +} + +/** +update the value of the key EtcdKeySequence in ETCD cluster with the parameter of maxSeq, +when the value of the key EtcdKeySequence is equal to or large than the parameter maxSeq, +return the value of EtcdKeySequence in the ETCD cluster; +when the value of the EtcdKeySequence is less than the parameter maxSeq, +return the value of the parameter maxSeq +*/ +func setMaxSequenceToEtcd(kvApi client.KeysAPI, maxSeq uint64) (uint64, error) { + maxSeqStr := strconv.FormatUint(maxSeq, 10) + ctx, cancel := context.WithTimeout(context.Background(), EtcdContextTimeoutSecond) + defer cancel() + + for { + getResp, err := kvApi.Get(ctx, EtcdKeySequence, &client.GetOptions{Recursive: false, Quorum: true}) + if err != nil { + if ce, ok := err.(client.Error); ok && (ce.Code == client.ErrorCodeKeyNotFound) { + _, err := kvApi.Create(ctx, EtcdKeySequence, maxSeqStr) + if err == nil { + continue + } + if ce, ok = err.(client.Error); ok && (ce.Code == client.ErrorCodeNodeExist) { + continue + } + return 0, err + } else { + return 0, err + } + } + + if getResp.Node == nil { + continue + } + prevSeqStr := getResp.Node.Value + prevSeq, err := strconv.ParseUint(prevSeqStr, 10, 64) + if err != nil { + return 0, err + } + if prevSeq >= maxSeq { + return prevSeq, nil + } + + _, err = kvApi.Set(ctx, EtcdKeySequence, maxSeqStr, &client.SetOptions{PrevValue: prevSeqStr}) + if err != nil { + return 0, err + } + } +} + +func openSequenceFile(file string) (*os.File, error) { + _, err := os.Stat(file) + if os.IsNotExist(err) { + fid, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE, 0644) + if err != nil { + return nil, err + } + if err := writeSequenceFile(fid, 1, 0); err != nil { + return nil, err + } + return fid, nil + } else { + return os.OpenFile(file, os.O_RDWR|os.O_CREATE, 0644) + } +} + +/* +read sequence and step from sequence file +*/ +func readSequenceFile(file *os.File) (uint64, uint64, error) { + sequence := make([]byte, FileMaxSequenceLength) + size, err := file.ReadAt(sequence, 0) + if (err != nil) && (err != io.EOF) { + err := fmt.Errorf("cannot read file %s, %v", file.Name(), err) + return 0, 0, err + } + sequence = sequence[0:size] + seqs := strings.Split(string(sequence), ":") + maxId, err := strconv.ParseUint(seqs[0], 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("parse sequence from file failed, %v", err) + } + + if len(seqs) > 1 { + step, err := strconv.ParseUint(seqs[1], 10, 64) + if err != nil { + return 0, 0, fmt.Errorf("parse sequence from file failed, %v", err) + } + return maxId, step, nil + } + + return maxId, 0, nil +} + +/** +write the sequence and step to sequence file +*/ +func writeSequenceFile(file *os.File, sequence, step uint64) error { + _ = step + seqStr := fmt.Sprintf("%d:%d", sequence, sequence) + if _, err := file.Seek(0, 0); err != nil { + err = fmt.Errorf("cannot seek to the beginning of %s: %v", file.Name(), err) + return err + } + if err := file.Truncate(0); err != nil { + return fmt.Errorf("truncate sequence file faield : %v", err) + } + if _, err := file.WriteString(seqStr); err != nil { + return fmt.Errorf("write file %s failed, %v", file.Name(), err) + } + if err := file.Sync(); err != nil { + return fmt.Errorf("flush file %s failed, %v", file.Name(), err) + } + return nil +} + +// the UT helper method +// func deleteEtcdKey(kvApi client.KeysAPI, key string) error { +// ctx, cancel := context.WithTimeout(context.Background(), EtcdContextTimeoutSecond) +// defer cancel() +// _, err := kvApi.Delete(ctx, key, &client.DeleteOptions{Dir: false}) +// if err != nil { +// return err +// } +// return nil +// } diff --git a/weed/sequence/memory_sequencer.go b/weed/sequence/memory_sequencer.go index d727dc723..e20c29cc7 100644 --- a/weed/sequence/memory_sequencer.go +++ b/weed/sequence/memory_sequencer.go @@ -15,12 +15,12 @@ func NewMemorySequencer() (m *MemorySequencer) { return } -func (m *MemorySequencer) NextFileId(count uint64) (uint64, uint64) { +func (m *MemorySequencer) NextFileId(count uint64) uint64 { m.sequenceLock.Lock() defer m.sequenceLock.Unlock() ret := m.counter - m.counter += uint64(count) - return ret, count + m.counter += count + return ret } func (m *MemorySequencer) SetMax(seenValue uint64) { diff --git a/weed/sequence/sequence.go b/weed/sequence/sequence.go index fbdc3b8ef..2258d001b 100644 --- a/weed/sequence/sequence.go +++ b/weed/sequence/sequence.go @@ -1,7 +1,7 @@ package sequence type Sequencer interface { - NextFileId(count uint64) (uint64, uint64) + NextFileId(count uint64) uint64 SetMax(uint64) Peek() uint64 } diff --git a/weed/server/common.go b/weed/server/common.go index f1f2e4e4f..e06142d7f 100644 --- a/weed/server/common.go +++ b/weed/server/common.go @@ -1,26 +1,29 @@ package weed_server import ( - "bytes" "encoding/json" "errors" "fmt" + "io" + "mime/multipart" "net/http" "path/filepath" "strconv" "strings" "time" + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" - "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/stats" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/util" - _ "github.com/chrislusf/seaweedfs/weed/statik" "github.com/gorilla/mux" statik "github.com/rakyll/statik/fs" + + _ "github.com/chrislusf/seaweedfs/weed/statik" ) var serverStats *stats.ServerStats @@ -47,10 +50,16 @@ func writeJson(w http.ResponseWriter, r *http.Request, httpStatus int, obj inter if callback == "" { w.Header().Set("Content-Type", "application/json") w.WriteHeader(httpStatus) + if httpStatus == http.StatusNotModified { + return + } _, err = w.Write(bytes) } else { w.Header().Set("Content-Type", "application/javascript") w.WriteHeader(httpStatus) + if httpStatus == http.StatusNotModified { + return + } if _, err = w.Write([]uint8(callback)); err != nil { return } @@ -69,7 +78,8 @@ func writeJson(w http.ResponseWriter, r *http.Request, httpStatus int, obj inter // wrapper for writeJson - just logs errors func writeJsonQuiet(w http.ResponseWriter, r *http.Request, httpStatus int, obj interface{}) { if err := writeJson(w, r, httpStatus, obj); err != nil { - glog.V(0).Infof("error writing JSON %s: %v", obj, err) + glog.V(0).Infof("error writing JSON status %d: %v", httpStatus, err) + glog.V(1).Infof("JSON content: %+v", obj) } } func writeJsonError(w http.ResponseWriter, r *http.Request, httpStatus int, err error) { @@ -82,8 +92,7 @@ func debug(params ...interface{}) { glog.V(4).Infoln(params...) } -func submitForClientHandler(w http.ResponseWriter, r *http.Request, masterUrl string) { - jwt := security.GetJwt(r) +func submitForClientHandler(w http.ResponseWriter, r *http.Request, masterUrl string, grpcDialOption grpc.DialOption) { m := make(map[string]interface{}) if r.Method != "POST" { writeJsonError(w, r, http.StatusMethodNotAllowed, errors.New("Only submit via POST!")) @@ -91,42 +100,51 @@ func submitForClientHandler(w http.ResponseWriter, r *http.Request, masterUrl st } debug("parsing upload file...") - fname, data, mimeType, pairMap, isGzipped, lastModified, _, _, pe := storage.ParseUpload(r) + pu, pe := needle.ParseUpload(r, 256*1024*1024) if pe != nil { writeJsonError(w, r, http.StatusBadRequest, pe) return } - debug("assigning file id for", fname) + debug("assigning file id for", pu.FileName) r.ParseForm() + count := uint64(1) + if r.FormValue("count") != "" { + count, pe = strconv.ParseUint(r.FormValue("count"), 10, 32) + if pe != nil { + writeJsonError(w, r, http.StatusBadRequest, pe) + return + } + } ar := &operation.VolumeAssignRequest{ - Count: 1, + Count: count, + DataCenter: r.FormValue("dataCenter"), Replication: r.FormValue("replication"), Collection: r.FormValue("collection"), Ttl: r.FormValue("ttl"), } - assignResult, ae := operation.Assign(masterUrl, ar) + assignResult, ae := operation.Assign(masterUrl, grpcDialOption, ar) if ae != nil { writeJsonError(w, r, http.StatusInternalServerError, ae) return } url := "http://" + assignResult.Url + "/" + assignResult.Fid - if lastModified != 0 { - url = url + "?ts=" + strconv.FormatUint(lastModified, 10) + if pu.ModifiedTime != 0 { + url = url + "?ts=" + strconv.FormatUint(pu.ModifiedTime, 10) } debug("upload file to store", url) - uploadResult, err := operation.Upload(url, fname, bytes.NewReader(data), isGzipped, mimeType, pairMap, jwt) + uploadResult, err := operation.UploadData(url, pu.FileName, false, pu.Data, pu.IsGzipped, pu.MimeType, pu.PairMap, assignResult.Auth) if err != nil { writeJsonError(w, r, http.StatusInternalServerError, err) return } - m["fileName"] = fname + m["fileName"] = pu.FileName m["fid"] = assignResult.Fid m["fileUrl"] = assignResult.PublicUrl + "/" + assignResult.Fid - m["size"] = uploadResult.Size + m["size"] = pu.OriginalDataSize m["eTag"] = uploadResult.ETag writeJsonQuiet(w, r, http.StatusCreated, m) return @@ -193,3 +211,107 @@ func handleStaticResources2(r *mux.Router) { r.Handle("/favicon.ico", http.FileServer(statikFS)) r.PathPrefix("/seaweedfsstatic/").Handler(http.StripPrefix("/seaweedfsstatic", http.FileServer(statikFS))) } + +func adjustHeadersAfterHEAD(w http.ResponseWriter, r *http.Request, filename string) { + if filename != "" { + contentDisposition := "inline" + if r.FormValue("dl") != "" { + if dl, _ := strconv.ParseBool(r.FormValue("dl")); dl { + contentDisposition = "attachment" + } + } + w.Header().Set("Content-Disposition", contentDisposition+`; filename="`+fileNameEscaper.Replace(filename)+`"`) + } +} + +func processRangeRequst(r *http.Request, w http.ResponseWriter, totalSize int64, mimeType string, writeFn func(writer io.Writer, offset int64, size int64) error) { + rangeReq := r.Header.Get("Range") + + if rangeReq == "" { + w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) + if err := writeFn(w, 0, totalSize); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + return + } + + //the rest is dealing with partial content request + //mostly copy from src/pkg/net/http/fs.go + ranges, err := parseRange(rangeReq, totalSize) + if err != nil { + http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable) + return + } + if sumRangesSize(ranges) > totalSize { + // The total number of bytes in all the ranges + // is larger than the size of the file by + // itself, so this is probably an attack, or a + // dumb client. Ignore the range request. + return + } + if len(ranges) == 0 { + return + } + if len(ranges) == 1 { + // RFC 2616, Section 14.16: + // "When an HTTP message includes the content of a single + // range (for example, a response to a request for a + // single range, or to a request for a set of ranges + // that overlap without any holes), this content is + // transmitted with a Content-Range header, and a + // Content-Length header showing the number of bytes + // actually transferred. + // ... + // A response to a request for a single range MUST NOT + // be sent using the multipart/byteranges media type." + ra := ranges[0] + w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) + w.Header().Set("Content-Range", ra.contentRange(totalSize)) + w.WriteHeader(http.StatusPartialContent) + + err = writeFn(w, ra.start, ra.length) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + return + } + + // process multiple ranges + for _, ra := range ranges { + if ra.start > totalSize { + http.Error(w, "Out of Range", http.StatusRequestedRangeNotSatisfiable) + return + } + } + sendSize := rangesMIMESize(ranges, mimeType, totalSize) + pr, pw := io.Pipe() + mw := multipart.NewWriter(pw) + w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary()) + sendContent := pr + defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish. + go func() { + for _, ra := range ranges { + part, e := mw.CreatePart(ra.mimeHeader(mimeType, totalSize)) + if e != nil { + pw.CloseWithError(e) + return + } + if e = writeFn(part, ra.start, ra.length); e != nil { + pw.CloseWithError(e) + return + } + } + mw.Close() + pw.Close() + }() + if w.Header().Get("Content-Encoding") == "" { + w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10)) + } + w.WriteHeader(http.StatusPartialContent) + if _, err := io.CopyN(w, sendContent, sendSize); err != nil { + http.Error(w, "Internal Error", http.StatusInternalServerError) + return + } +} diff --git a/weed/server/common_test.go b/weed/server/common_test.go new file mode 100644 index 000000000..2e6c70bfe --- /dev/null +++ b/weed/server/common_test.go @@ -0,0 +1,31 @@ +package weed_server + +import ( + "strings" + "testing" +) + +func TestParseURL(t *testing.T) { + if vid, fid, _, _, _ := parseURLPath("/1,06dfa8a684"); true { + if vid != "1" { + t.Errorf("fail to parse vid: %s", vid) + } + if fid != "06dfa8a684" { + t.Errorf("fail to parse fid: %s", fid) + } + } + if vid, fid, _, _, _ := parseURLPath("/1,06dfa8a684_1"); true { + if vid != "1" { + t.Errorf("fail to parse vid: %s", vid) + } + if fid != "06dfa8a684_1" { + t.Errorf("fail to parse fid: %s", fid) + } + if sepIndex := strings.LastIndex(fid, "_"); sepIndex > 0 { + fid = fid[:sepIndex] + } + if fid != "06dfa8a684" { + t.Errorf("fail to parse fid: %s", fid) + } + } +} diff --git a/weed/server/filer_grpc_server.go b/weed/server/filer_grpc_server.go index 72d5b2e1d..b904c1393 100644 --- a/weed/server/filer_grpc_server.go +++ b/weed/server/filer_grpc_server.go @@ -5,22 +5,26 @@ import ( "fmt" "os" "path/filepath" + "strconv" + "strings" "time" "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/chrislusf/seaweedfs/weed/util" - "strconv" - "strings" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" ) func (fs *FilerServer) LookupDirectoryEntry(ctx context.Context, req *filer_pb.LookupDirectoryEntryRequest) (*filer_pb.LookupDirectoryEntryResponse, error) { - entry, err := fs.filer.FindEntry(filer2.FullPath(filepath.Join(req.Directory, req.Name))) + entry, err := fs.filer.FindEntry(ctx, filer2.FullPath(filepath.ToSlash(filepath.Join(req.Directory, req.Name)))) + if err == filer_pb.ErrNotFound { + return &filer_pb.LookupDirectoryEntryResponse{}, nil + } if err != nil { - return nil, fmt.Errorf("%s not found under %s: %v", req.Name, req.Directory, err) + glog.V(3).Infof("LookupDirectoryEntry %s: %+v, ", filepath.Join(req.Directory, req.Name), err) + return nil, err } return &filer_pb.LookupDirectoryEntryResponse{ @@ -29,27 +33,33 @@ func (fs *FilerServer) LookupDirectoryEntry(ctx context.Context, req *filer_pb.L IsDirectory: entry.IsDirectory(), Attributes: filer2.EntryAttributeToPb(entry), Chunks: entry.Chunks, + Extended: entry.Extended, }, }, nil } -func (fs *FilerServer) ListEntries(ctx context.Context, req *filer_pb.ListEntriesRequest) (*filer_pb.ListEntriesResponse, error) { +func (fs *FilerServer) ListEntries(req *filer_pb.ListEntriesRequest, stream filer_pb.SeaweedFiler_ListEntriesServer) error { limit := int(req.Limit) if limit == 0 { limit = fs.option.DirListingLimit } - resp := &filer_pb.ListEntriesResponse{} + paginationLimit := filer2.PaginationSize + if limit < paginationLimit { + paginationLimit = limit + } + lastFileName := req.StartFromFileName includeLastFile := req.InclusiveStartFrom for limit > 0 { - entries, err := fs.filer.ListDirectoryEntries(filer2.FullPath(req.Directory), lastFileName, includeLastFile, limit) + entries, err := fs.filer.ListDirectoryEntries(stream.Context(), filer2.FullPath(req.Directory), lastFileName, includeLastFile, paginationLimit) + if err != nil { - return nil, err + return err } if len(entries) == 0 { - return resp, nil + return nil } includeLastFile = false @@ -64,18 +74,31 @@ func (fs *FilerServer) ListEntries(ctx context.Context, req *filer_pb.ListEntrie } } - resp.Entries = append(resp.Entries, &filer_pb.Entry{ - Name: entry.Name(), - IsDirectory: entry.IsDirectory(), - Chunks: entry.Chunks, - Attributes: filer2.EntryAttributeToPb(entry), - }) + if err := stream.Send(&filer_pb.ListEntriesResponse{ + Entry: &filer_pb.Entry{ + Name: entry.Name(), + IsDirectory: entry.IsDirectory(), + Chunks: entry.Chunks, + Attributes: filer2.EntryAttributeToPb(entry), + Extended: entry.Extended, + }, + }); err != nil { + return err + } + limit-- + if limit == 0 { + return nil + } + } + + if len(entries) < paginationLimit { + break } } - return resp, nil + return nil } func (fs *FilerServer) LookupVolume(ctx context.Context, req *filer_pb.LookupVolumeRequest) (*filer_pb.LookupVolumeResponse, error) { @@ -91,7 +114,11 @@ func (fs *FilerServer) LookupVolume(ctx context.Context, req *filer_pb.LookupVol return nil, err } var locs []*filer_pb.Location - for _, loc := range fs.filer.MasterClient.GetLocations(uint32(vid)) { + locations, found := fs.filer.MasterClient.GetLocations(uint32(vid)) + if !found { + continue + } + for _, loc := range locations { locs = append(locs, &filer_pb.Location{ Url: loc.Url, PublicUrl: loc.PublicUrl, @@ -107,45 +134,57 @@ func (fs *FilerServer) LookupVolume(ctx context.Context, req *filer_pb.LookupVol func (fs *FilerServer) CreateEntry(ctx context.Context, req *filer_pb.CreateEntryRequest) (resp *filer_pb.CreateEntryResponse, err error) { - fullpath := filer2.FullPath(filepath.Join(req.Directory, req.Entry.Name)) + resp = &filer_pb.CreateEntryResponse{} + + fullpath := filer2.FullPath(filepath.ToSlash(filepath.Join(req.Directory, req.Entry.Name))) chunks, garbages := filer2.CompactFileChunks(req.Entry.Chunks) - fs.filer.DeleteChunks(garbages) + if req.Entry.Attributes == nil { + glog.V(3).Infof("CreateEntry %s: nil attributes", filepath.Join(req.Directory, req.Entry.Name)) + resp.Error = fmt.Sprintf("can not create entry with empty attributes") + return + } - err = fs.filer.CreateEntry(&filer2.Entry{ + createErr := fs.filer.CreateEntry(ctx, &filer2.Entry{ FullPath: fullpath, Attr: filer2.PbToEntryAttribute(req.Entry.Attributes), Chunks: chunks, - }) + }, req.OExcl) - if err == nil { + if createErr == nil { + fs.filer.DeleteChunks(garbages) + } else { + glog.V(3).Infof("CreateEntry %s: %v", filepath.Join(req.Directory, req.Entry.Name), createErr) + resp.Error = createErr.Error() } - return &filer_pb.CreateEntryResponse{}, err + return } func (fs *FilerServer) UpdateEntry(ctx context.Context, req *filer_pb.UpdateEntryRequest) (*filer_pb.UpdateEntryResponse, error) { - fullpath := filepath.Join(req.Directory, req.Entry.Name) - entry, err := fs.filer.FindEntry(filer2.FullPath(fullpath)) + fullpath := filepath.ToSlash(filepath.Join(req.Directory, req.Entry.Name)) + entry, err := fs.filer.FindEntry(ctx, filer2.FullPath(fullpath)) if err != nil { return &filer_pb.UpdateEntryResponse{}, fmt.Errorf("not found %s: %v", fullpath, err) } // remove old chunks if not included in the new ones - unusedChunks := filer2.FindUnusedFileChunks(entry.Chunks, req.Entry.Chunks) + unusedChunks := filer2.MinusChunks(entry.Chunks, req.Entry.Chunks) chunks, garbages := filer2.CompactFileChunks(req.Entry.Chunks) newEntry := &filer2.Entry{ - FullPath: filer2.FullPath(filepath.Join(req.Directory, req.Entry.Name)), + FullPath: filer2.FullPath(filepath.ToSlash(filepath.Join(req.Directory, req.Entry.Name))), Attr: entry.Attr, + Extended: req.Entry.Extended, Chunks: chunks, } - glog.V(3).Infof("updating %s: %+v, chunks %d: %v => %+v, chunks %d: %v", + glog.V(3).Infof("updating %s: %+v, chunks %d: %v => %+v, chunks %d: %v, extended: %v => %v", fullpath, entry.Attr, len(entry.Chunks), entry.Chunks, - req.Entry.Attributes, len(req.Entry.Chunks), req.Entry.Chunks) + req.Entry.Attributes, len(req.Entry.Chunks), req.Entry.Chunks, + entry.Extended, req.Entry.Extended) if req.Entry.Attributes != nil { if req.Entry.Attributes.Mtime != 0 { @@ -157,6 +196,8 @@ func (fs *FilerServer) UpdateEntry(ctx context.Context, req *filer_pb.UpdateEntr newEntry.Attr.Uid = req.Entry.Attributes.Uid newEntry.Attr.Gid = req.Entry.Attributes.Gid newEntry.Attr.Mime = req.Entry.Attributes.Mime + newEntry.Attr.UserName = req.Entry.Attributes.UserName + newEntry.Attr.GroupNames = req.Entry.Attributes.GroupName } @@ -164,9 +205,11 @@ func (fs *FilerServer) UpdateEntry(ctx context.Context, req *filer_pb.UpdateEntr return &filer_pb.UpdateEntryResponse{}, err } - if err = fs.filer.UpdateEntry(newEntry); err == nil { + if err = fs.filer.UpdateEntry(ctx, entry, newEntry); err == nil { fs.filer.DeleteChunks(unusedChunks) fs.filer.DeleteChunks(garbages) + } else { + glog.V(3).Infof("UpdateEntry %s: %v", filepath.Join(req.Directory, req.Entry.Name), err) } fs.filer.NotifyUpdateEvent(entry, newEntry, true) @@ -175,8 +218,31 @@ func (fs *FilerServer) UpdateEntry(ctx context.Context, req *filer_pb.UpdateEntr } func (fs *FilerServer) DeleteEntry(ctx context.Context, req *filer_pb.DeleteEntryRequest) (resp *filer_pb.DeleteEntryResponse, err error) { - err = fs.filer.DeleteEntryMetaAndData(filer2.FullPath(filepath.Join(req.Directory, req.Name)), req.IsRecursive, req.IsDeleteData) - return &filer_pb.DeleteEntryResponse{}, err + err = fs.filer.DeleteEntryMetaAndData(ctx, filer2.FullPath(filepath.ToSlash(filepath.Join(req.Directory, req.Name))), req.IsRecursive, req.IgnoreRecursiveError, req.IsDeleteData) + resp = &filer_pb.DeleteEntryResponse{} + if err != nil { + resp.Error = err.Error() + } + return resp, nil +} + +func (fs *FilerServer) StreamDeleteEntries(stream filer_pb.SeaweedFiler_StreamDeleteEntriesServer) error { + for { + req, err := stream.Recv() + if err != nil { + return fmt.Errorf("receive delete entry request: %v", err) + } + fullpath := filer2.FullPath(filepath.ToSlash(filepath.Join(req.Directory, req.Name))) + err = fs.filer.DeleteEntryMetaAndData(context.Background(), fullpath, req.IsRecursive, req.IgnoreRecursiveError, req.IsDeleteData) + resp := &filer_pb.DeleteEntryResponse{} + if err != nil { + resp.Error = err.Error() + } + if err := stream.Send(resp); err != nil { + return err + } + } + return nil } func (fs *FilerServer) AssignVolume(ctx context.Context, req *filer_pb.AssignVolumeRequest) (resp *filer_pb.AssignVolumeResponse, err error) { @@ -185,6 +251,7 @@ func (fs *FilerServer) AssignVolume(ctx context.Context, req *filer_pb.AssignVol if req.TtlSec > 0 { ttlStr = strconv.Itoa(int(req.TtlSec)) } + collection, replication := fs.detectCollection(req.ParentPath, req.Collection, req.Replication) var altRequest *operation.VolumeAssignRequest @@ -195,41 +262,82 @@ func (fs *FilerServer) AssignVolume(ctx context.Context, req *filer_pb.AssignVol assignRequest := &operation.VolumeAssignRequest{ Count: uint64(req.Count), - Replication: req.Replication, - Collection: req.Collection, + Replication: replication, + Collection: collection, Ttl: ttlStr, DataCenter: dataCenter, } if dataCenter != "" { altRequest = &operation.VolumeAssignRequest{ Count: uint64(req.Count), - Replication: req.Replication, - Collection: req.Collection, + Replication: replication, + Collection: collection, Ttl: ttlStr, DataCenter: "", } } - assignResult, err := operation.Assign(fs.filer.GetMaster(), assignRequest, altRequest) + assignResult, err := operation.Assign(fs.filer.GetMaster(), fs.grpcDialOption, assignRequest, altRequest) if err != nil { - return nil, fmt.Errorf("assign volume: %v", err) + glog.V(3).Infof("AssignVolume: %v", err) + return &filer_pb.AssignVolumeResponse{Error: fmt.Sprintf("assign volume: %v", err)}, nil } if assignResult.Error != "" { - return nil, fmt.Errorf("assign volume result: %v", assignResult.Error) + glog.V(3).Infof("AssignVolume error: %v", assignResult.Error) + return &filer_pb.AssignVolumeResponse{Error: fmt.Sprintf("assign volume result: %v", assignResult.Error)}, nil } return &filer_pb.AssignVolumeResponse{ - FileId: assignResult.Fid, - Count: int32(assignResult.Count), - Url: assignResult.Url, - PublicUrl: assignResult.PublicUrl, - }, err + FileId: assignResult.Fid, + Count: int32(assignResult.Count), + Url: assignResult.Url, + PublicUrl: assignResult.PublicUrl, + Auth: string(assignResult.Auth), + Collection: collection, + Replication: replication, + }, nil } func (fs *FilerServer) DeleteCollection(ctx context.Context, req *filer_pb.DeleteCollectionRequest) (resp *filer_pb.DeleteCollectionResponse, err error) { - for _, master := range fs.option.Masters { - _, err = util.Get(fmt.Sprintf("http://%s/col/delete?collection=%s", master, req.Collection)) - } + err = fs.filer.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + _, err := client.CollectionDelete(context.Background(), &master_pb.CollectionDeleteRequest{ + Name: req.GetCollection(), + }) + return err + }) return &filer_pb.DeleteCollectionResponse{}, err } + +func (fs *FilerServer) Statistics(ctx context.Context, req *filer_pb.StatisticsRequest) (resp *filer_pb.StatisticsResponse, err error) { + + input := &master_pb.StatisticsRequest{ + Replication: req.Replication, + Collection: req.Collection, + Ttl: req.Ttl, + } + + output, err := operation.Statistics(fs.filer.GetMaster(), fs.grpcDialOption, input) + if err != nil { + return nil, err + } + + return &filer_pb.StatisticsResponse{ + TotalSize: output.TotalSize, + UsedSize: output.UsedSize, + FileCount: output.FileCount, + }, nil +} + +func (fs *FilerServer) GetFilerConfiguration(ctx context.Context, req *filer_pb.GetFilerConfigurationRequest) (resp *filer_pb.GetFilerConfigurationResponse, err error) { + + return &filer_pb.GetFilerConfigurationResponse{ + Masters: fs.option.Masters, + Collection: fs.option.Collection, + Replication: fs.option.DefaultReplication, + MaxMb: uint32(fs.option.MaxMB), + DirBuckets: fs.filer.DirBucketsPath, + DirQueues: fs.filer.DirQueuesPath, + Cipher: fs.filer.Cipher, + }, nil +} diff --git a/weed/server/filer_grpc_server_rename.go b/weed/server/filer_grpc_server_rename.go new file mode 100644 index 000000000..0669a26f1 --- /dev/null +++ b/weed/server/filer_grpc_server_rename.go @@ -0,0 +1,130 @@ +package weed_server + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "path/filepath" +) + +func (fs *FilerServer) AtomicRenameEntry(ctx context.Context, req *filer_pb.AtomicRenameEntryRequest) (*filer_pb.AtomicRenameEntryResponse, error) { + + glog.V(1).Infof("AtomicRenameEntry %v", req) + + ctx, err := fs.filer.BeginTransaction(ctx) + if err != nil { + return nil, err + } + + oldParent := filer2.FullPath(filepath.ToSlash(req.OldDirectory)) + + oldEntry, err := fs.filer.FindEntry(ctx, oldParent.Child(req.OldName)) + if err != nil { + fs.filer.RollbackTransaction(ctx) + return nil, fmt.Errorf("%s/%s not found: %v", req.OldDirectory, req.OldName, err) + } + + var events MoveEvents + moveErr := fs.moveEntry(ctx, oldParent, oldEntry, filer2.FullPath(filepath.ToSlash(req.NewDirectory)), req.NewName, &events) + if moveErr != nil { + fs.filer.RollbackTransaction(ctx) + return nil, fmt.Errorf("%s/%s move error: %v", req.OldDirectory, req.OldName, err) + } else { + if commitError := fs.filer.CommitTransaction(ctx); commitError != nil { + fs.filer.RollbackTransaction(ctx) + return nil, fmt.Errorf("%s/%s move commit error: %v", req.OldDirectory, req.OldName, err) + } + } + + for _, entry := range events.newEntries { + fs.filer.NotifyUpdateEvent(nil, entry, false) + } + for _, entry := range events.oldEntries { + fs.filer.NotifyUpdateEvent(entry, nil, false) + } + + return &filer_pb.AtomicRenameEntryResponse{}, nil +} + +func (fs *FilerServer) moveEntry(ctx context.Context, oldParent filer2.FullPath, entry *filer2.Entry, newParent filer2.FullPath, newName string, events *MoveEvents) error { + if entry.IsDirectory() { + if err := fs.moveFolderSubEntries(ctx, oldParent, entry, newParent, newName, events); err != nil { + return err + } + } + return fs.moveSelfEntry(ctx, oldParent, entry, newParent, newName, events) +} + +func (fs *FilerServer) moveFolderSubEntries(ctx context.Context, oldParent filer2.FullPath, entry *filer2.Entry, newParent filer2.FullPath, newName string, events *MoveEvents) error { + + currentDirPath := oldParent.Child(entry.Name()) + newDirPath := newParent.Child(newName) + + glog.V(1).Infof("moving folder %s => %s", currentDirPath, newDirPath) + + lastFileName := "" + includeLastFile := false + for { + + entries, err := fs.filer.ListDirectoryEntries(ctx, currentDirPath, lastFileName, includeLastFile, 1024) + if err != nil { + return err + } + + // println("found", len(entries), "entries under", currentDirPath) + + for _, item := range entries { + lastFileName = item.Name() + // println("processing", lastFileName) + err := fs.moveEntry(ctx, currentDirPath, item, newDirPath, item.Name(), events) + if err != nil { + return err + } + } + if len(entries) < 1024 { + break + } + } + return nil +} + +func (fs *FilerServer) moveSelfEntry(ctx context.Context, oldParent filer2.FullPath, entry *filer2.Entry, newParent filer2.FullPath, newName string, events *MoveEvents) error { + + oldPath, newPath := oldParent.Child(entry.Name()), newParent.Child(newName) + + glog.V(1).Infof("moving entry %s => %s", oldPath, newPath) + + if oldPath == newPath { + glog.V(1).Infof("skip moving entry %s => %s", oldPath, newPath) + return nil + } + + // add to new directory + newEntry := &filer2.Entry{ + FullPath: newPath, + Attr: entry.Attr, + Chunks: entry.Chunks, + } + createErr := fs.filer.CreateEntry(ctx, newEntry, false) + if createErr != nil { + return createErr + } + + // delete old entry + deleteErr := fs.filer.DeleteEntryMetaAndData(ctx, oldPath, false, false, false) + if deleteErr != nil { + return deleteErr + } + + events.oldEntries = append(events.oldEntries, entry) + events.newEntries = append(events.newEntries, newEntry) + return nil + +} + +type MoveEvents struct { + oldEntries []*filer2.Entry + newEntries []*filer2.Entry +} diff --git a/weed/server/filer_server.go b/weed/server/filer_server.go index 2aabb9932..656bb2ed8 100644 --- a/weed/server/filer_server.go +++ b/weed/server/filer_server.go @@ -1,96 +1,140 @@ package weed_server import ( + "context" + "fmt" "net/http" + "os" + "time" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/util" "github.com/chrislusf/seaweedfs/weed/filer2" _ "github.com/chrislusf/seaweedfs/weed/filer2/cassandra" + _ "github.com/chrislusf/seaweedfs/weed/filer2/etcd" _ "github.com/chrislusf/seaweedfs/weed/filer2/leveldb" - _ "github.com/chrislusf/seaweedfs/weed/filer2/memdb" + _ "github.com/chrislusf/seaweedfs/weed/filer2/leveldb2" _ "github.com/chrislusf/seaweedfs/weed/filer2/mysql" _ "github.com/chrislusf/seaweedfs/weed/filer2/postgres" _ "github.com/chrislusf/seaweedfs/weed/filer2/redis" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/notification" + _ "github.com/chrislusf/seaweedfs/weed/notification/aws_sqs" + _ "github.com/chrislusf/seaweedfs/weed/notification/gocdk_pub_sub" + _ "github.com/chrislusf/seaweedfs/weed/notification/google_pub_sub" _ "github.com/chrislusf/seaweedfs/weed/notification/kafka" _ "github.com/chrislusf/seaweedfs/weed/notification/log" "github.com/chrislusf/seaweedfs/weed/security" - "github.com/spf13/viper" ) type FilerOption struct { Masters []string Collection string DefaultReplication string - RedirectOnRead bool DisableDirListing bool MaxMB int - SecretKey string DirListingLimit int DataCenter string + DefaultLevelDbDir string + DisableHttp bool + Port uint32 + recursiveDelete bool + Cipher bool } type FilerServer struct { - option *FilerOption - secret security.Secret - filer *filer2.Filer + option *FilerOption + secret security.SigningKey + filer *filer2.Filer + grpcDialOption grpc.DialOption } func NewFilerServer(defaultMux, readonlyMux *http.ServeMux, option *FilerOption) (fs *FilerServer, err error) { fs = &FilerServer{ - option: option, + option: option, + grpcDialOption: security.LoadClientTLS(util.GetViper(), "grpc.filer"), } if len(option.Masters) == 0 { glog.Fatal("master list is required!") } - fs.filer = filer2.NewFiler(option.Masters) + fs.filer = filer2.NewFiler(option.Masters, fs.grpcDialOption, option.Port+10000) + fs.filer.Cipher = option.Cipher go fs.filer.KeepConnectedToMaster() - LoadConfiguration("filer", true) - v := viper.GetViper() + v := util.GetViper() + if !util.LoadConfiguration("filer", false) { + v.Set("leveldb2.enabled", true) + v.Set("leveldb2.dir", option.DefaultLevelDbDir) + _, err := os.Stat(option.DefaultLevelDbDir) + if os.IsNotExist(err) { + os.MkdirAll(option.DefaultLevelDbDir, 0755) + } + } + util.LoadConfiguration("notification", false) + fs.option.recursiveDelete = v.GetBool("filer.options.recursive_delete") + v.Set("filer.option.buckets_folder", "/buckets") + v.Set("filer.option.queues_folder", "/queues") + fs.filer.DirBucketsPath = v.GetString("filer.option.buckets_folder") + fs.filer.DirQueuesPath = v.GetString("filer.option.queues_folder") fs.filer.LoadConfiguration(v) - notification.LoadConfiguration(v.Sub("notification")) + notification.LoadConfiguration(v, "notification.") handleStaticResources(defaultMux) - defaultMux.HandleFunc("/", fs.filerHandler) + if !option.DisableHttp { + defaultMux.HandleFunc("/", fs.filerHandler) + } if defaultMux != readonlyMux { readonlyMux.HandleFunc("/", fs.readonlyFilerHandler) } - return fs, nil -} + fs.filer.LoadBuckets(fs.filer.DirBucketsPath) + + maybeStartMetrics(fs, option) -func (fs *FilerServer) jwt(fileId string) security.EncodedJwt { - return security.GenJwt(fs.secret, fileId) + return fs, nil } -func LoadConfiguration(configFileName string, required bool) { - - // find a filer store - viper.SetConfigName(configFileName) // name of config file (without extension) - viper.AddConfigPath(".") // optionally look for config in the working directory - viper.AddConfigPath("$HOME/.seaweedfs") // call multiple times to add many search paths - viper.AddConfigPath("/etc/seaweedfs/") // path to look for the config file in - - glog.V(0).Infof("Reading %s.toml from %s", configFileName, viper.ConfigFileUsed()) - - if err := viper.ReadInConfig(); err != nil { // Handle errors reading the config file - glog.V(0).Infof("Reading %s: %v", viper.ConfigFileUsed(), err) - if required { - glog.Errorf("Failed to load %s.toml file from current directory, or $HOME/.seaweedfs/, or /etc/seaweedfs/"+ - "\n\nPlease follow this example and add a filer.toml file to "+ - "current directory, or $HOME/.seaweedfs/, or /etc/seaweedfs/:\n"+ - " https://github.com/chrislusf/seaweedfs/blob/master/weed/%s.toml\n"+ - "\n\nOr use this command to generate the default toml file\n"+ - " weed scaffold -config=%s -output=.\n", - configFileName, configFileName, configFileName) +func maybeStartMetrics(fs *FilerServer, option *FilerOption) { + isConnected := false + var metricsAddress string + var metricsIntervalSec int + var readErr error + for !isConnected { + metricsAddress, metricsIntervalSec, readErr = readFilerConfiguration(fs.grpcDialOption, option.Masters[0]) + if readErr == nil { + isConnected = true + } else { + time.Sleep(7 * time.Second) } } + if metricsAddress == "" && metricsIntervalSec <= 0 { + return + } + go stats.LoopPushingMetric("filer", stats.SourceName(option.Port), stats.FilerGather, + func() (addr string, intervalSeconds int) { + return metricsAddress, metricsIntervalSec + }) +} +func readFilerConfiguration(grpcDialOption grpc.DialOption, masterGrpcAddress string) (metricsAddress string, metricsIntervalSec int, err error) { + err = operation.WithMasterServerClient(masterGrpcAddress, grpcDialOption, func(masterClient master_pb.SeaweedClient) error { + resp, err := masterClient.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get master %s configuration: %v", masterGrpcAddress, err) + } + metricsAddress, metricsIntervalSec = resp.MetricsAddress, int(resp.MetricsIntervalSeconds) + return nil + }) + return } diff --git a/weed/server/filer_server_handlers.go b/weed/server/filer_server_handlers.go index d76d7df8c..b6bfc3b04 100644 --- a/weed/server/filer_server_handlers.go +++ b/weed/server/filer_server_handlers.go @@ -2,28 +2,47 @@ package weed_server import ( "net/http" + "time" + + "github.com/chrislusf/seaweedfs/weed/stats" ) func (fs *FilerServer) filerHandler(w http.ResponseWriter, r *http.Request) { + start := time.Now() switch r.Method { case "GET": + stats.FilerRequestCounter.WithLabelValues("get").Inc() fs.GetOrHeadHandler(w, r, true) + stats.FilerRequestHistogram.WithLabelValues("get").Observe(time.Since(start).Seconds()) case "HEAD": + stats.FilerRequestCounter.WithLabelValues("head").Inc() fs.GetOrHeadHandler(w, r, false) + stats.FilerRequestHistogram.WithLabelValues("head").Observe(time.Since(start).Seconds()) case "DELETE": + stats.FilerRequestCounter.WithLabelValues("delete").Inc() fs.DeleteHandler(w, r) + stats.FilerRequestHistogram.WithLabelValues("delete").Observe(time.Since(start).Seconds()) case "PUT": + stats.FilerRequestCounter.WithLabelValues("put").Inc() fs.PostHandler(w, r) + stats.FilerRequestHistogram.WithLabelValues("put").Observe(time.Since(start).Seconds()) case "POST": + stats.FilerRequestCounter.WithLabelValues("post").Inc() fs.PostHandler(w, r) + stats.FilerRequestHistogram.WithLabelValues("post").Observe(time.Since(start).Seconds()) } } func (fs *FilerServer) readonlyFilerHandler(w http.ResponseWriter, r *http.Request) { + start := time.Now() switch r.Method { case "GET": + stats.FilerRequestCounter.WithLabelValues("get").Inc() fs.GetOrHeadHandler(w, r, true) + stats.FilerRequestHistogram.WithLabelValues("get").Observe(time.Since(start).Seconds()) case "HEAD": + stats.FilerRequestCounter.WithLabelValues("head").Inc() fs.GetOrHeadHandler(w, r, false) + stats.FilerRequestHistogram.WithLabelValues("head").Observe(time.Since(start).Seconds()) } } diff --git a/weed/server/filer_server_handlers_read.go b/weed/server/filer_server_handlers_read.go index 554ce3fd1..5967535b8 100644 --- a/weed/server/filer_server_handlers_read.go +++ b/weed/server/filer_server_handlers_read.go @@ -1,34 +1,43 @@ package weed_server import ( + "context" "io" + "mime" "net/http" - "net/url" + "path/filepath" + "strconv" "strings" "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/util" - "mime" - "mime/multipart" - "path" - "strconv" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/stats" ) func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request, isGetMethod bool) { + path := r.URL.Path - if strings.HasSuffix(path, "/") && len(path) > 1 { + isForDirectory := strings.HasSuffix(path, "/") + if isForDirectory && len(path) > 1 { path = path[:len(path)-1] } - entry, err := fs.filer.FindEntry(filer2.FullPath(path)) + entry, err := fs.filer.FindEntry(context.Background(), filer2.FullPath(path)) if err != nil { if path == "/" { fs.listDirectoryHandler(w, r) return } - glog.V(1).Infof("Not found %s: %v", path, err) - w.WriteHeader(http.StatusNotFound) + if err == filer_pb.ErrNotFound { + glog.V(1).Infof("Not found %s: %v", path, err) + stats.FilerRequestCounter.WithLabelValues("read.notfound").Inc() + w.WriteHeader(http.StatusNotFound) + } else { + glog.V(0).Infof("Internal %s: %v", path, err) + stats.FilerRequestCounter.WithLabelValues("read.internalerror").Inc() + w.WriteHeader(http.StatusInternalServerError) + } return } @@ -41,211 +50,48 @@ func (fs *FilerServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request, return } - if len(entry.Chunks) == 0 { - glog.V(1).Infof("no file chunks for %s, attr=%+v", path, entry.Attr) - w.WriteHeader(http.StatusNoContent) - return - } - - w.Header().Set("Accept-Ranges", "bytes") - if r.Method == "HEAD" { - w.Header().Set("Content-Length", strconv.FormatInt(int64(filer2.TotalSize(entry.Chunks)), 10)) - return - } - - if len(entry.Chunks) == 1 { - fs.handleSingleChunk(w, r, entry) - return - } - - fs.handleMultipleChunks(w, r, entry) - -} - -func (fs *FilerServer) handleSingleChunk(w http.ResponseWriter, r *http.Request, entry *filer2.Entry) { - - fileId := entry.Chunks[0].FileId - - urlString, err := fs.filer.MasterClient.LookupFileId(fileId) - if err != nil { - glog.V(1).Infof("operation LookupFileId %s failed, err: %v", fileId, err) + if isForDirectory { w.WriteHeader(http.StatusNotFound) return } - if fs.option.RedirectOnRead { - http.Redirect(w, r, urlString, http.StatusFound) - return - } - - u, _ := url.Parse(urlString) - q := u.Query() - for key, values := range r.URL.Query() { - for _, value := range values { - q.Add(key, value) - } - } - u.RawQuery = q.Encode() - request := &http.Request{ - Method: r.Method, - URL: u, - Proto: r.Proto, - ProtoMajor: r.ProtoMajor, - ProtoMinor: r.ProtoMinor, - Header: r.Header, - Body: r.Body, - Host: r.Host, - ContentLength: r.ContentLength, - } - glog.V(3).Infoln("retrieving from", u) - resp, do_err := util.Do(request) - if do_err != nil { - glog.V(0).Infoln("failing to connect to volume server", do_err.Error()) - writeJsonError(w, r, http.StatusInternalServerError, do_err) + if len(entry.Chunks) == 0 { + glog.V(1).Infof("no file chunks for %s, attr=%+v", path, entry.Attr) + stats.FilerRequestCounter.WithLabelValues("read.nocontent").Inc() + w.WriteHeader(http.StatusNoContent) return } - defer resp.Body.Close() - for k, v := range resp.Header { - w.Header()[k] = v - } - w.WriteHeader(resp.StatusCode) - io.Copy(w, resp.Body) -} -func (fs *FilerServer) handleMultipleChunks(w http.ResponseWriter, r *http.Request, entry *filer2.Entry) { + w.Header().Set("Accept-Ranges", "bytes") + w.Header().Set("Last-Modified", entry.Attr.Mtime.Format(http.TimeFormat)) - mimeType := entry.Mime + // mime type + mimeType := entry.Attr.Mime if mimeType == "" { - if ext := path.Ext(entry.Name()); ext != "" { + if ext := filepath.Ext(entry.Name()); ext != "" { mimeType = mime.TypeByExtension(ext) } } if mimeType != "" { w.Header().Set("Content-Type", mimeType) } - setEtag(w, filer2.ETag(entry.Chunks)) - - totalSize := int64(filer2.TotalSize(entry.Chunks)) - - rangeReq := r.Header.Get("Range") - - if rangeReq == "" { - w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) - if err := fs.writeContent(w, entry, 0, int(totalSize)); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - return - } - //the rest is dealing with partial content request - //mostly copy from src/pkg/net/http/fs.go - ranges, err := parseRange(rangeReq, totalSize) - if err != nil { - http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable) - return - } - if sumRangesSize(ranges) > totalSize { - // The total number of bytes in all the ranges - // is larger than the size of the file by - // itself, so this is probably an attack, or a - // dumb client. Ignore the range request. - return - } - if len(ranges) == 0 { - return - } - if len(ranges) == 1 { - // RFC 2616, Section 14.16: - // "When an HTTP message includes the content of a single - // range (for example, a response to a request for a - // single range, or to a request for a set of ranges - // that overlap without any holes), this content is - // transmitted with a Content-Range header, and a - // Content-Length header showing the number of bytes - // actually transferred. - // ... - // A response to a request for a single range MUST NOT - // be sent using the multipart/byteranges media type." - ra := ranges[0] - w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) - w.Header().Set("Content-Range", ra.contentRange(totalSize)) - w.WriteHeader(http.StatusPartialContent) - - err = fs.writeContent(w, entry, ra.start, int(ra.length)) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - return - } + // set etag + setEtag(w, filer2.ETag(entry.Chunks)) - // process multiple ranges - for _, ra := range ranges { - if ra.start > totalSize { - http.Error(w, "Out of Range", http.StatusRequestedRangeNotSatisfiable) - return - } - } - sendSize := rangesMIMESize(ranges, mimeType, totalSize) - pr, pw := io.Pipe() - mw := multipart.NewWriter(pw) - w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary()) - sendContent := pr - defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish. - go func() { - for _, ra := range ranges { - part, e := mw.CreatePart(ra.mimeHeader(mimeType, totalSize)) - if e != nil { - pw.CloseWithError(e) - return - } - if e = fs.writeContent(part, entry, ra.start, int(ra.length)); e != nil { - pw.CloseWithError(e) - return - } - } - mw.Close() - pw.Close() - }() - if w.Header().Get("Content-Encoding") == "" { - w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10)) - } - w.WriteHeader(http.StatusPartialContent) - if _, err := io.CopyN(w, sendContent, sendSize); err != nil { - http.Error(w, "Internal Error", http.StatusInternalServerError) + if r.Method == "HEAD" { + w.Header().Set("Content-Length", strconv.FormatInt(int64(filer2.TotalSize(entry.Chunks)), 10)) return } -} - -func (fs *FilerServer) writeContent(w io.Writer, entry *filer2.Entry, offset int64, size int) error { + filename := entry.Name() + adjustHeadersAfterHEAD(w, r, filename) - chunkViews := filer2.ViewFromChunks(entry.Chunks, offset, size) - - fileId2Url := make(map[string]string) - - for _, chunkView := range chunkViews { - - urlString, err := fs.filer.MasterClient.LookupFileId(chunkView.FileId) - if err != nil { - glog.V(1).Infof("operation LookupFileId %s failed, err: %v", chunkView.FileId, err) - return err - } - fileId2Url[chunkView.FileId] = urlString - } - - for _, chunkView := range chunkViews { - urlString := fileId2Url[chunkView.FileId] - _, err := util.ReadUrlAsStream(urlString, chunkView.Offset, int(chunkView.Size), func(data []byte) { - w.Write(data) - }) - if err != nil { - glog.V(1).Infof("read %s failed, err: %v", chunkView.FileId, err) - return err - } - } + totalSize := int64(filer2.TotalSize(entry.Chunks)) - return nil + processRangeRequst(r, w, totalSize, mimeType, func(writer io.Writer, offset int64, size int64) error { + return filer2.StreamContent(fs.filer.MasterClient, w, entry.Chunks, offset, int(size)) + }) } + diff --git a/weed/server/filer_server_handlers_read_dir.go b/weed/server/filer_server_handlers_read_dir.go index bcf7f0eb5..87e864559 100644 --- a/weed/server/filer_server_handlers_read_dir.go +++ b/weed/server/filer_server_handlers_read_dir.go @@ -1,6 +1,7 @@ package weed_server import ( + "context" "net/http" "strconv" "strings" @@ -8,6 +9,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/filer2" "github.com/chrislusf/seaweedfs/weed/glog" ui "github.com/chrislusf/seaweedfs/weed/server/filer_ui" + "github.com/chrislusf/seaweedfs/weed/stats" ) // listDirectoryHandler lists directories and folers under a directory @@ -15,6 +17,9 @@ import ( // sub directories are listed on the first page, when "lastFileName" // is empty. func (fs *FilerServer) listDirectoryHandler(w http.ResponseWriter, r *http.Request) { + + stats.FilerRequestCounter.WithLabelValues("list").Inc() + path := r.URL.Path if strings.HasSuffix(path, "/") && len(path) > 1 { path = path[:len(path)-1] @@ -27,7 +32,7 @@ func (fs *FilerServer) listDirectoryHandler(w http.ResponseWriter, r *http.Reque lastFileName := r.FormValue("lastFileName") - entries, err := fs.filer.ListDirectoryEntries(filer2.FullPath(path), lastFileName, false, limit) + entries, err := fs.filer.ListDirectoryEntries(context.Background(), filer2.FullPath(path), lastFileName, false, limit) if err != nil { glog.V(0).Infof("listDirectory %s %s %d: %s", path, lastFileName, limit, err) diff --git a/weed/server/filer_server_handlers_write.go b/weed/server/filer_server_handlers_write.go index 394f32d88..5cd174b17 100644 --- a/weed/server/filer_server_handlers_write.go +++ b/weed/server/filer_server_handlers_write.go @@ -1,11 +1,18 @@ package weed_server import ( + "context" + "crypto/md5" "encoding/json" "errors" + "fmt" + "io" "io/ioutil" + "mime" "net/http" "net/url" + "os" + filenamePath "path" "strconv" "strings" "time" @@ -14,8 +21,10 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/util" - "os" ) var ( @@ -25,45 +34,23 @@ var ( type FilerPostResult struct { Name string `json:"name,omitempty"` - Size uint32 `json:"size,omitempty"` + Size int64 `json:"size,omitempty"` Error string `json:"error,omitempty"` Fid string `json:"fid,omitempty"` Url string `json:"url,omitempty"` } -func (fs *FilerServer) queryFileInfoByPath(w http.ResponseWriter, r *http.Request, path string) (fileId, urlLocation string, err error) { - var entry *filer2.Entry - entry, err = fs.filer.FindEntry(filer2.FullPath(path)) - if err == filer2.ErrNotFound { - return "", "", nil - } - - if err != nil { - glog.V(0).Infoln("failing to find path in filer store", path, err.Error()) - writeJsonError(w, r, http.StatusInternalServerError, err) - return - } +func (fs *FilerServer) assignNewFileInfo(w http.ResponseWriter, r *http.Request, replication, collection, dataCenter, ttlString string) (fileId, urlLocation string, auth security.EncodedJwt, err error) { - if len(entry.Chunks) == 0 { - glog.V(1).Infof("empty entry: %s", path) - w.WriteHeader(http.StatusNoContent) - } else { - fileId = entry.Chunks[0].FileId - urlLocation, err = fs.filer.MasterClient.LookupFileId(fileId) - if err != nil { - glog.V(1).Infof("operation LookupFileId %s failed, err is %s", fileId, err.Error()) - w.WriteHeader(http.StatusNotFound) - } - } - return -} + stats.FilerRequestCounter.WithLabelValues("assign").Inc() + start := time.Now() + defer func() { stats.FilerRequestHistogram.WithLabelValues("assign").Observe(time.Since(start).Seconds()) }() -func (fs *FilerServer) assignNewFileInfo(w http.ResponseWriter, r *http.Request, replication, collection string, dataCenter string) (fileId, urlLocation string, err error) { ar := &operation.VolumeAssignRequest{ Count: 1, Replication: replication, Collection: collection, - Ttl: r.URL.Query().Get("ttl"), + Ttl: ttlString, DataCenter: dataCenter, } var altRequest *operation.VolumeAssignRequest @@ -72,12 +59,12 @@ func (fs *FilerServer) assignNewFileInfo(w http.ResponseWriter, r *http.Request, Count: 1, Replication: replication, Collection: collection, - Ttl: r.URL.Query().Get("ttl"), + Ttl: ttlString, DataCenter: "", } } - assignResult, ae := operation.Assign(fs.filer.GetMaster(), ar, altRequest) + assignResult, ae := operation.Assign(fs.filer.GetMaster(), fs.grpcDialOption, ar, altRequest) if ae != nil { glog.Errorf("failing to assign a file id: %v", ae) writeJsonError(w, r, http.StatusInternalServerError, ae) @@ -86,52 +73,155 @@ func (fs *FilerServer) assignNewFileInfo(w http.ResponseWriter, r *http.Request, } fileId = assignResult.Fid urlLocation = "http://" + assignResult.Url + "/" + assignResult.Fid + auth = assignResult.Auth return } func (fs *FilerServer) PostHandler(w http.ResponseWriter, r *http.Request) { + ctx := context.Background() + query := r.URL.Query() - replication := query.Get("replication") - if replication == "" { - replication = fs.option.DefaultReplication - } - collection := query.Get("collection") - if collection == "" { - collection = fs.option.Collection - } + collection, replication := fs.detectCollection(r.RequestURI, query.Get("collection"), query.Get("replication")) dataCenter := query.Get("dataCenter") if dataCenter == "" { dataCenter = fs.option.DataCenter } + ttlString := r.URL.Query().Get("ttl") - if autoChunked := fs.autoChunk(w, r, replication, collection, dataCenter); autoChunked { + // read ttl in seconds + ttl, err := needle.ReadTTL(ttlString) + ttlSeconds := int32(0) + if err == nil { + ttlSeconds = int32(ttl.Minutes()) * 60 + } + + if autoChunked := fs.autoChunk(ctx, w, r, replication, collection, dataCenter, ttlSeconds, ttlString); autoChunked { return } - fileId, urlLocation, err := fs.queryFileInfoByPath(w, r, r.URL.Path) - if err == nil && fileId == "" { - fileId, urlLocation, err = fs.assignNewFileInfo(w, r, replication, collection, dataCenter) + if fs.option.Cipher { + reply, err := fs.encrypt(ctx, w, r, replication, collection, dataCenter, ttlSeconds, ttlString) + if err != nil { + writeJsonError(w, r, http.StatusInternalServerError, err) + } else if reply != nil { + writeJsonQuiet(w, r, http.StatusCreated, reply) + } + + return } + + fileId, urlLocation, auth, err := fs.assignNewFileInfo(w, r, replication, collection, dataCenter, ttlString) + if err != nil || fileId == "" || urlLocation == "" { + glog.V(0).Infof("fail to allocate volume for %s, collection:%s, datacenter:%s", r.URL.Path, collection, dataCenter) + writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("fail to allocate volume for %s, collection:%s, datacenter:%s", r.URL.Path, collection, dataCenter)) return } glog.V(4).Infof("write %s to %v", r.URL.Path, urlLocation) u, _ := url.Parse(urlLocation) + ret, err := fs.uploadToVolumeServer(r, u, auth, w, fileId) + if err != nil { + return + } - // This allows a client to generate a chunk manifest and submit it to the filer -- it is a little off - // because they need to provide FIDs instead of file paths... - cm, _ := strconv.ParseBool(query.Get("cm")) - if cm { - q := u.Query() - q.Set("cm", "true") - u.RawQuery = q.Encode() + if err = fs.updateFilerStore(ctx, r, w, replication, collection, ret, fileId, ttlSeconds); err != nil { + return } - glog.V(4).Infoln("post to", u) - // send request to volume server + // send back post result + reply := FilerPostResult{ + Name: ret.Name, + Size: int64(ret.Size), + Error: ret.Error, + Fid: fileId, + Url: urlLocation, + } + setEtag(w, ret.ETag) + writeJsonQuiet(w, r, http.StatusCreated, reply) +} + +// update metadata in filer store +func (fs *FilerServer) updateFilerStore(ctx context.Context, r *http.Request, w http.ResponseWriter, + replication string, collection string, ret *operation.UploadResult, fileId string, ttlSeconds int32) (err error) { + + stats.FilerRequestCounter.WithLabelValues("postStoreWrite").Inc() + start := time.Now() + defer func() { + stats.FilerRequestHistogram.WithLabelValues("postStoreWrite").Observe(time.Since(start).Seconds()) + }() + + modeStr := r.URL.Query().Get("mode") + if modeStr == "" { + modeStr = "0660" + } + mode, err := strconv.ParseUint(modeStr, 8, 32) + if err != nil { + glog.Errorf("Invalid mode format: %s, use 0660 by default", modeStr) + mode = 0660 + } + + path := r.URL.Path + if strings.HasSuffix(path, "/") { + if ret.Name != "" { + path += ret.Name + } + } + existingEntry, err := fs.filer.FindEntry(ctx, filer2.FullPath(path)) + crTime := time.Now() + if err == nil && existingEntry != nil { + crTime = existingEntry.Crtime + } + entry := &filer2.Entry{ + FullPath: filer2.FullPath(path), + Attr: filer2.Attr{ + Mtime: time.Now(), + Crtime: crTime, + Mode: os.FileMode(mode), + Uid: OS_UID, + Gid: OS_GID, + Replication: replication, + Collection: collection, + TtlSec: ttlSeconds, + Mime: ret.Mime, + }, + Chunks: []*filer_pb.FileChunk{{ + FileId: fileId, + Size: uint64(ret.Size), + Mtime: time.Now().UnixNano(), + ETag: ret.ETag, + }}, + } + if entry.Attr.Mime == "" { + if ext := filenamePath.Ext(path); ext != "" { + entry.Attr.Mime = mime.TypeByExtension(ext) + } + } + // glog.V(4).Infof("saving %s => %+v", path, entry) + if dbErr := fs.filer.CreateEntry(ctx, entry, false); dbErr != nil { + fs.filer.DeleteChunks(entry.Chunks) + glog.V(0).Infof("failing to write %s to filer server : %v", path, dbErr) + writeJsonError(w, r, http.StatusInternalServerError, dbErr) + err = dbErr + return + } + + return nil +} + +// send request to volume server +func (fs *FilerServer) uploadToVolumeServer(r *http.Request, u *url.URL, auth security.EncodedJwt, w http.ResponseWriter, fileId string) (ret *operation.UploadResult, err error) { + + stats.FilerRequestCounter.WithLabelValues("postUpload").Inc() + start := time.Now() + defer func() { stats.FilerRequestHistogram.WithLabelValues("postUpload").Observe(time.Since(start).Seconds()) }() + + ret = &operation.UploadResult{} + hash := md5.New() + var body = ioutil.NopCloser(io.TeeReader(r.Body, hash)) + request := &http.Request{ Method: r.Method, URL: u, @@ -139,104 +229,120 @@ func (fs *FilerServer) PostHandler(w http.ResponseWriter, r *http.Request) { ProtoMajor: r.ProtoMajor, ProtoMinor: r.ProtoMinor, Header: r.Header, - Body: r.Body, + Body: body, Host: r.Host, ContentLength: r.ContentLength, } - resp, do_err := util.Do(request) - if do_err != nil { - glog.Errorf("failing to connect to volume server %s: %v, %+v", r.RequestURI, do_err, r.Method) - writeJsonError(w, r, http.StatusInternalServerError, do_err) + + if auth != "" { + request.Header.Set("Authorization", "BEARER "+string(auth)) + } + resp, doErr := util.Do(request) + if doErr != nil { + glog.Errorf("failing to connect to volume server %s: %v, %+v", r.RequestURI, doErr, r.Method) + writeJsonError(w, r, http.StatusInternalServerError, doErr) + err = doErr return } - defer resp.Body.Close() - etag := resp.Header.Get("ETag") - resp_body, ra_err := ioutil.ReadAll(resp.Body) - if ra_err != nil { - glog.V(0).Infoln("failing to upload to volume server", r.RequestURI, ra_err.Error()) - writeJsonError(w, r, http.StatusInternalServerError, ra_err) + defer func() { + io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() + }() + + respBody, raErr := ioutil.ReadAll(resp.Body) + if raErr != nil { + glog.V(0).Infoln("failing to upload to volume server", r.RequestURI, raErr.Error()) + writeJsonError(w, r, http.StatusInternalServerError, raErr) + err = raErr return } - glog.V(4).Infoln("post result", string(resp_body)) - var ret operation.UploadResult - unmarshal_err := json.Unmarshal(resp_body, &ret) - if unmarshal_err != nil { - glog.V(0).Infoln("failing to read upload resonse", r.RequestURI, string(resp_body)) - writeJsonError(w, r, http.StatusInternalServerError, unmarshal_err) + + glog.V(4).Infoln("post result", string(respBody)) + unmarshalErr := json.Unmarshal(respBody, &ret) + if unmarshalErr != nil { + glog.V(0).Infoln("failing to read upload resonse", r.RequestURI, string(respBody)) + writeJsonError(w, r, http.StatusInternalServerError, unmarshalErr) + err = unmarshalErr return } if ret.Error != "" { + err = errors.New(ret.Error) glog.V(0).Infoln("failing to post to volume server", r.RequestURI, ret.Error) - writeJsonError(w, r, http.StatusInternalServerError, errors.New(ret.Error)) + writeJsonError(w, r, http.StatusInternalServerError, err) return } - // find correct final path path := r.URL.Path if strings.HasSuffix(path, "/") { if ret.Name != "" { path += ret.Name } else { + err = fmt.Errorf("can not to write to folder %s without a file name", path) fs.filer.DeleteFileByFileId(fileId) glog.V(0).Infoln("Can not to write to folder", path, "without a file name!") - writeJsonError(w, r, http.StatusInternalServerError, - errors.New("Can not to write to folder "+path+" without a file name")) + writeJsonError(w, r, http.StatusInternalServerError, err) return } } - - // update metadata in filer store - glog.V(4).Infoln("saving", path, "=>", fileId) - entry := &filer2.Entry{ - FullPath: filer2.FullPath(path), - Attr: filer2.Attr{ - Mtime: time.Now(), - Crtime: time.Now(), - Mode: 0660, - Uid: OS_UID, - Gid: OS_GID, - Replication: replication, - Collection: collection, - TtlSec: int32(util.ParseInt(r.URL.Query().Get("ttl"), 0)), - }, - Chunks: []*filer_pb.FileChunk{{ - FileId: fileId, - Size: uint64(ret.Size), - Mtime: time.Now().UnixNano(), - ETag: etag, - }}, - } - if db_err := fs.filer.CreateEntry(entry); db_err != nil { - fs.filer.DeleteFileByFileId(fileId) - glog.V(0).Infof("failing to write %s to filer server : %v", path, db_err) - writeJsonError(w, r, http.StatusInternalServerError, db_err) - return - } - - // send back post result - reply := FilerPostResult{ - Name: ret.Name, - Size: ret.Size, - Error: ret.Error, - Fid: fileId, - Url: urlLocation, - } - setEtag(w, etag) - writeJsonQuiet(w, r, http.StatusCreated, reply) + // use filer calculated md5 ETag, instead of the volume server crc ETag + ret.ETag = fmt.Sprintf("%x", hash.Sum(nil)) + return } // curl -X DELETE http://localhost:8888/path/to // curl -X DELETE http://localhost:8888/path/to?recursive=true +// curl -X DELETE http://localhost:8888/path/to?recursive=true&ignoreRecursiveError=true +// curl -X DELETE http://localhost:8888/path/to?recursive=true&skipChunkDeletion=true func (fs *FilerServer) DeleteHandler(w http.ResponseWriter, r *http.Request) { isRecursive := r.FormValue("recursive") == "true" + if !isRecursive && fs.option.recursiveDelete { + if r.FormValue("recursive") != "false" { + isRecursive = true + } + } + ignoreRecursiveError := r.FormValue("ignoreRecursiveError") == "true" + skipChunkDeletion := r.FormValue("skipChunkDeletion") == "true" - err := fs.filer.DeleteEntryMetaAndData(filer2.FullPath(r.URL.Path), isRecursive, true) + err := fs.filer.DeleteEntryMetaAndData(context.Background(), filer2.FullPath(r.URL.Path), isRecursive, ignoreRecursiveError, !skipChunkDeletion) if err != nil { glog.V(1).Infoln("deleting", r.URL.Path, ":", err.Error()) - writeJsonError(w, r, http.StatusInternalServerError, err) + httpStatus := http.StatusInternalServerError + if err == filer_pb.ErrNotFound { + httpStatus = http.StatusNotFound + } + writeJsonError(w, r, httpStatus, err) return } w.WriteHeader(http.StatusNoContent) } + +func (fs *FilerServer) detectCollection(requestURI, qCollection, qReplication string) (collection, replication string) { + // default + collection = fs.option.Collection + replication = fs.option.DefaultReplication + + // get default collection settings + if qCollection != "" { + collection = qCollection + } + if qReplication != "" { + replication = qReplication + } + + // required by buckets folder + if strings.HasPrefix(requestURI, fs.filer.DirBucketsPath+"/") { + bucketAndObjectKey := requestURI[len(fs.filer.DirBucketsPath)+1:] + t := strings.Index(bucketAndObjectKey, "/") + if t < 0 { + collection = bucketAndObjectKey + } + if t > 0 { + collection = bucketAndObjectKey[:t] + } + replication = fs.filer.ReadBucketOption(collection) + } + + return +} diff --git a/weed/server/filer_server_handlers_write_autochunk.go b/weed/server/filer_server_handlers_write_autochunk.go index 4b1745aaa..666004c33 100644 --- a/weed/server/filer_server_handlers_write_autochunk.go +++ b/weed/server/filer_server_handlers_write_autochunk.go @@ -1,9 +1,8 @@ package weed_server import ( - "bytes" + "context" "io" - "io/ioutil" "net/http" "path" "strconv" @@ -14,10 +13,12 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" - "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/stats" ) -func (fs *FilerServer) autoChunk(w http.ResponseWriter, r *http.Request, replication string, collection string, dataCenter string) bool { +func (fs *FilerServer) autoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, + replication string, collection string, dataCenter string, ttlSec int32, ttlString string) bool { if r.Method != "POST" { glog.V(4).Infoln("AutoChunking not supported for method", r.Method) return false @@ -53,7 +54,7 @@ func (fs *FilerServer) autoChunk(w http.ResponseWriter, r *http.Request, replica return false } - reply, err := fs.doAutoChunk(w, r, contentLength, chunkSize, replication, collection, dataCenter) + reply, err := fs.doAutoChunk(ctx, w, r, contentLength, chunkSize, replication, collection, dataCenter, ttlSec, ttlString) if err != nil { writeJsonError(w, r, http.StatusInternalServerError, err) } else if reply != nil { @@ -62,7 +63,14 @@ func (fs *FilerServer) autoChunk(w http.ResponseWriter, r *http.Request, replica return true } -func (fs *FilerServer) doAutoChunk(w http.ResponseWriter, r *http.Request, contentLength int64, chunkSize int32, replication string, collection string, dataCenter string) (filerResult *FilerPostResult, replyerr error) { +func (fs *FilerServer) doAutoChunk(ctx context.Context, w http.ResponseWriter, r *http.Request, + contentLength int64, chunkSize int32, replication string, collection string, dataCenter string, ttlSec int32, ttlString string) (filerResult *FilerPostResult, replyerr error) { + + stats.FilerRequestCounter.WithLabelValues("postAutoChunk").Inc() + start := time.Now() + defer func() { + stats.FilerRequestHistogram.WithLabelValues("postAutoChunk").Observe(time.Since(start).Seconds()) + }() multipartReader, multipartReaderErr := r.MultipartReader() if multipartReaderErr != nil { @@ -78,68 +86,53 @@ func (fs *FilerServer) doAutoChunk(w http.ResponseWriter, r *http.Request, conte if fileName != "" { fileName = path.Base(fileName) } + contentType := part1.Header.Get("Content-Type") var fileChunks []*filer_pb.FileChunk - totalBytesRead := int64(0) - tmpBufferSize := int32(1024 * 1024) - tmpBuffer := bytes.NewBuffer(make([]byte, 0, tmpBufferSize)) - chunkBuf := make([]byte, chunkSize+tmpBufferSize, chunkSize+tmpBufferSize) // chunk size plus a little overflow - chunkBufOffset := int32(0) chunkOffset := int64(0) - writtenChunks := 0 - filerResult = &FilerPostResult{ - Name: fileName, - } + for chunkOffset < contentLength { + limitedReader := io.LimitReader(part1, int64(chunkSize)) - for totalBytesRead < contentLength { - tmpBuffer.Reset() - bytesRead, readErr := io.CopyN(tmpBuffer, part1, int64(tmpBufferSize)) - readFully := readErr != nil && readErr == io.EOF - tmpBuf := tmpBuffer.Bytes() - bytesToCopy := tmpBuf[0:int(bytesRead)] - - copy(chunkBuf[chunkBufOffset:chunkBufOffset+int32(bytesRead)], bytesToCopy) - chunkBufOffset = chunkBufOffset + int32(bytesRead) - - if chunkBufOffset >= chunkSize || readFully || (chunkBufOffset > 0 && bytesRead == 0) { - writtenChunks = writtenChunks + 1 - fileId, urlLocation, assignErr := fs.assignNewFileInfo(w, r, replication, collection, dataCenter) - if assignErr != nil { - return nil, assignErr - } - - // upload the chunk to the volume server - chunkName := fileName + "_chunk_" + strconv.FormatInt(int64(len(fileChunks)+1), 10) - uploadErr := fs.doUpload(urlLocation, w, r, chunkBuf[0:chunkBufOffset], chunkName, "application/octet-stream", fileId) - if uploadErr != nil { - return nil, uploadErr - } - - // Save to chunk manifest structure - fileChunks = append(fileChunks, - &filer_pb.FileChunk{ - FileId: fileId, - Offset: chunkOffset, - Size: uint64(chunkBufOffset), - Mtime: time.Now().UnixNano(), - }, - ) - - // reset variables for the next chunk - chunkBufOffset = 0 - chunkOffset = totalBytesRead + int64(bytesRead) + // assign one file id for one chunk + fileId, urlLocation, auth, assignErr := fs.assignNewFileInfo(w, r, replication, collection, dataCenter, ttlString) + if assignErr != nil { + return nil, assignErr } - totalBytesRead = totalBytesRead + int64(bytesRead) + // upload the chunk to the volume server + uploadResult, uploadErr := fs.doUpload(urlLocation, w, r, limitedReader, fileName, contentType, nil, auth) + if uploadErr != nil { + return nil, uploadErr + } - if bytesRead == 0 || readFully { + // if last chunk exhausted the reader exactly at the border + if uploadResult.Size == 0 { break } - if readErr != nil { - return nil, readErr + // Save to chunk manifest structure + fileChunks = append(fileChunks, + &filer_pb.FileChunk{ + FileId: fileId, + Offset: chunkOffset, + Size: uint64(uploadResult.Size), + Mtime: time.Now().UnixNano(), + ETag: uploadResult.ETag, + CipherKey: uploadResult.CipherKey, + IsGzipped: uploadResult.Gzip > 0, + }, + ) + + glog.V(4).Infof("uploaded %s chunk %d to %s [%d,%d) of %d", fileName, len(fileChunks), fileId, chunkOffset, chunkOffset+int64(uploadResult.Size), contentLength) + + // reset variables for the next chunk + chunkOffset = chunkOffset + int64(uploadResult.Size) + + // if last chunk was not at full chunk size, but already exhausted the reader + if int64(uploadResult.Size) < int64(chunkSize) { + break } } @@ -161,30 +154,35 @@ func (fs *FilerServer) doAutoChunk(w http.ResponseWriter, r *http.Request, conte Gid: OS_GID, Replication: replication, Collection: collection, - TtlSec: int32(util.ParseInt(r.URL.Query().Get("ttl"), 0)), + TtlSec: ttlSec, + Mime: contentType, }, Chunks: fileChunks, } - if db_err := fs.filer.CreateEntry(entry); db_err != nil { - replyerr = db_err - filerResult.Error = db_err.Error() - glog.V(0).Infof("failing to write %s to filer server : %v", path, db_err) + + filerResult = &FilerPostResult{ + Name: fileName, + Size: chunkOffset, + } + + if dbErr := fs.filer.CreateEntry(ctx, entry, false); dbErr != nil { + fs.filer.DeleteChunks(entry.Chunks) + replyerr = dbErr + filerResult.Error = dbErr.Error() + glog.V(0).Infof("failing to write %s to filer server : %v", path, dbErr) return } return } -func (fs *FilerServer) doUpload(urlLocation string, w http.ResponseWriter, r *http.Request, chunkBuf []byte, fileName string, contentType string, fileId string) (err error) { - err = nil +func (fs *FilerServer) doUpload(urlLocation string, w http.ResponseWriter, r *http.Request, limitedReader io.Reader, fileName string, contentType string, pairMap map[string]string, auth security.EncodedJwt) (*operation.UploadResult, error) { - ioReader := ioutil.NopCloser(bytes.NewBuffer(chunkBuf)) - uploadResult, uploadError := operation.Upload(urlLocation, fileName, ioReader, false, contentType, nil, fs.jwt(fileId)) - if uploadResult != nil { - glog.V(0).Infoln("Chunk upload result. Name:", uploadResult.Name, "Fid:", fileId, "Size:", uploadResult.Size) - } - if uploadError != nil { - err = uploadError - } - return + stats.FilerRequestCounter.WithLabelValues("postAutoChunkUpload").Inc() + start := time.Now() + defer func() { + stats.FilerRequestHistogram.WithLabelValues("postAutoChunkUpload").Observe(time.Since(start).Seconds()) + }() + + return operation.Upload(urlLocation, fileName, fs.option.Cipher, limitedReader, false, contentType, pairMap, auth) } diff --git a/weed/server/filer_server_handlers_write_cipher.go b/weed/server/filer_server_handlers_write_cipher.go new file mode 100644 index 000000000..06670399c --- /dev/null +++ b/weed/server/filer_server_handlers_write_cipher.go @@ -0,0 +1,98 @@ +package weed_server + +import ( + "context" + "fmt" + "net/http" + "strings" + "time" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +// handling single chunk POST or PUT upload +func (fs *FilerServer) encrypt(ctx context.Context, w http.ResponseWriter, r *http.Request, + replication string, collection string, dataCenter string, ttlSeconds int32, ttlString string) (filerResult *FilerPostResult, err error) { + + fileId, urlLocation, auth, err := fs.assignNewFileInfo(w, r, replication, collection, dataCenter, ttlString) + + if err != nil || fileId == "" || urlLocation == "" { + return nil, fmt.Errorf("fail to allocate volume for %s, collection:%s, datacenter:%s", r.URL.Path, collection, dataCenter) + } + + glog.V(4).Infof("write %s to %v", r.URL.Path, urlLocation) + + // Note: encrypt(gzip(data)), encrypt data first, then gzip + + sizeLimit := int64(fs.option.MaxMB) * 1024 * 1024 + + pu, err := needle.ParseUpload(r, sizeLimit) + uncompressedData := pu.Data + if pu.IsGzipped { + uncompressedData = pu.UncompressedData + } + if pu.MimeType == "" { + pu.MimeType = http.DetectContentType(uncompressedData) + } + + uploadResult, uploadError := operation.UploadData(urlLocation, pu.FileName, true, uncompressedData, false, pu.MimeType, pu.PairMap, auth) + if uploadError != nil { + return nil, fmt.Errorf("upload to volume server: %v", uploadError) + } + + // Save to chunk manifest structure + fileChunks := []*filer_pb.FileChunk{ + { + FileId: fileId, + Offset: 0, + Size: uint64(uploadResult.Size), + Mtime: time.Now().UnixNano(), + ETag: uploadResult.Md5, + CipherKey: uploadResult.CipherKey, + IsGzipped: uploadResult.Gzip > 0, + }, + } + + fmt.Printf("uploaded: %+v\n", uploadResult) + + path := r.URL.Path + if strings.HasSuffix(path, "/") { + if pu.FileName != "" { + path += pu.FileName + } + } + + entry := &filer2.Entry{ + FullPath: filer2.FullPath(path), + Attr: filer2.Attr{ + Mtime: time.Now(), + Crtime: time.Now(), + Mode: 0660, + Uid: OS_UID, + Gid: OS_GID, + Replication: replication, + Collection: collection, + TtlSec: ttlSeconds, + Mime: pu.MimeType, + }, + Chunks: fileChunks, + } + + filerResult = &FilerPostResult{ + Name: pu.FileName, + Size: int64(pu.OriginalDataSize), + } + + if dbErr := fs.filer.CreateEntry(ctx, entry, false); dbErr != nil { + fs.filer.DeleteChunks(entry.Chunks) + err = dbErr + filerResult.Error = dbErr.Error() + return + } + + return +} diff --git a/weed/server/filer_ui/breadcrumb.go b/weed/server/filer_ui/breadcrumb.go index d056a4b25..2f0df7f91 100644 --- a/weed/server/filer_ui/breadcrumb.go +++ b/weed/server/filer_ui/breadcrumb.go @@ -14,10 +14,14 @@ func ToBreadcrumb(fullpath string) (crumbs []Breadcrumb) { parts := strings.Split(fullpath, "/") for i := 0; i < len(parts); i++ { - crumbs = append(crumbs, Breadcrumb{ - Name: parts[i] + "/", - Link: "/" + filepath.Join(parts[0:i+1]...), - }) + crumb := Breadcrumb{ + Name: parts[i] + " /", + Link: "/" + filepath.ToSlash(filepath.Join(parts[0:i+1]...)), + } + if !strings.HasSuffix(crumb.Link, "/") { + crumb.Link += "/" + } + crumbs = append(crumbs, crumb) } return diff --git a/weed/server/filer_ui/templates.go b/weed/server/filer_ui/templates.go index a8afde879..e532b27e2 100644 --- a/weed/server/filer_ui/templates.go +++ b/weed/server/filer_ui/templates.go @@ -43,14 +43,14 @@ var StatusTpl = template.Must(template.New("status").Funcs(funcMap).Parse(`<!DOC <div class="container"> <div class="page-header"> <h1> - <img src="/seaweedfsstatic/seaweed50x50.png"></img> + <a href="https://github.com/chrislusf/seaweedfs"><img src="/seaweedfsstatic/seaweed50x50.png"></img></a> SeaweedFS Filer </h1> </div> <div class="row"> <div> {{ range $entry := .Breadcrumbs }} - <a href={{ $entry.Link }} > + <a href="{{ $entry.Link }}" > {{ $entry.Name }} </a> {{ end }} @@ -78,20 +78,19 @@ var StatusTpl = template.Must(template.New("status").Funcs(funcMap).Parse(`<!DOC </a> {{end}} </td> - <td align="right"> + <td align="right" nowrap> {{if $entry.IsDirectory}} {{else}} - {{ $entry.Mime }} + {{ $entry.Mime }} {{end}} </td> - <td align="right"> + <td align="right" nowrap> {{if $entry.IsDirectory}} {{else}} - {{ $entry.Size | humanizeBytes }} - + {{ $entry.Size | humanizeBytes }} {{end}} </td> - <td> + <td nowrap> {{ $entry.Timestamp.Format "2006-01-02 15:04" }} </td> </tr> @@ -162,7 +161,7 @@ function uploadFile(file, i) { var url = window.location.href var xhr = new XMLHttpRequest() var formData = new FormData() - xhr.open('POST', url, true) + xhr.open('POST', url, false) formData.append('file', file) xhr.send(formData) diff --git a/weed/server/master_grpc_server.go b/weed/server/master_grpc_server.go index d2c86598e..84087df8b 100644 --- a/weed/server/master_grpc_server.go +++ b/weed/server/master_grpc_server.go @@ -1,15 +1,20 @@ package weed_server import ( + "context" "fmt" "net" "strings" + "time" "github.com/chrislusf/raft" + "google.golang.org/grpc/peer" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/topology" - "google.golang.org/grpc/peer" ) func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServer) error { @@ -29,6 +34,9 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ for _, v := range dn.GetVolumes() { message.DeletedVids = append(message.DeletedVids, uint32(v.Id)) } + for _, s := range dn.GetEcShards() { + message.DeletedVids = append(message.DeletedVids, uint32(s.VolumeId)) + } if len(message.DeletedVids) > 0 { ms.clientChansLock.RLock() @@ -44,57 +52,104 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ for { heartbeat, err := stream.Recv() if err != nil { + if dn != nil { + glog.Warningf("SendHeartbeat.Recv server %s:%d : %v", dn.Ip, dn.Port, err) + } else { + glog.Warningf("SendHeartbeat.Recv: %v", err) + } return err } + t.Sequence.SetMax(heartbeat.MaxFileKey) + if dn == nil { - t.Sequence.SetMax(heartbeat.MaxFileKey) - if heartbeat.Ip == "" { - if pr, ok := peer.FromContext(stream.Context()); ok { - if pr.Addr != net.Addr(nil) { - heartbeat.Ip = pr.Addr.String()[0:strings.LastIndex(pr.Addr.String(), ":")] - glog.V(0).Infof("remote IP address is detected as %v", heartbeat.Ip) - } - } - } dcName, rackName := t.Configuration.Locate(heartbeat.Ip, heartbeat.DataCenter, heartbeat.Rack) dc := t.GetOrCreateDataCenter(dcName) rack := dc.GetOrCreateRack(rackName) dn = rack.GetOrCreateDataNode(heartbeat.Ip, int(heartbeat.Port), heartbeat.PublicUrl, - int(heartbeat.MaxVolumeCount)) + int64(heartbeat.MaxVolumeCount)) glog.V(0).Infof("added volume server %v:%d", heartbeat.GetIp(), heartbeat.GetPort()) if err := stream.Send(&master_pb.HeartbeatResponse{ - VolumeSizeLimit: uint64(ms.volumeSizeLimitMB) * 1024 * 1024, - SecretKey: string(ms.guard.SecretKey), + VolumeSizeLimit: uint64(ms.option.VolumeSizeLimitMB) * 1024 * 1024, + MetricsAddress: ms.option.MetricsAddress, + MetricsIntervalSeconds: uint32(ms.option.MetricsIntervalSec), + StorageBackends: backend.ToPbStorageBackends(), }); err != nil { + glog.Warningf("SendHeartbeat.Send volume size to %s:%d %v", dn.Ip, dn.Port, err) return err } } + glog.V(4).Infof("master received heartbeat %s", heartbeat.String()) message := &master_pb.VolumeLocation{ Url: dn.Url(), PublicUrl: dn.PublicUrl, } - if len(heartbeat.NewVids) > 0 || len(heartbeat.DeletedVids) > 0 { + if len(heartbeat.NewVolumes) > 0 || len(heartbeat.DeletedVolumes) > 0 { // process delta volume ids if exists for fast volume id updates - message.NewVids = append(message.NewVids, heartbeat.NewVids...) - message.DeletedVids = append(message.DeletedVids, heartbeat.DeletedVids...) - } else { + for _, volInfo := range heartbeat.NewVolumes { + message.NewVids = append(message.NewVids, volInfo.Id) + } + for _, volInfo := range heartbeat.DeletedVolumes { + message.DeletedVids = append(message.DeletedVids, volInfo.Id) + } + // update master internal volume layouts + t.IncrementalSyncDataNodeRegistration(heartbeat.NewVolumes, heartbeat.DeletedVolumes, dn) + } + + if len(heartbeat.Volumes) > 0 || heartbeat.HasNoVolumes { // process heartbeat.Volumes newVolumes, deletedVolumes := t.SyncDataNodeRegistration(heartbeat.Volumes, dn) for _, v := range newVolumes { + glog.V(0).Infof("master see new volume %d from %s", uint32(v.Id), dn.Url()) message.NewVids = append(message.NewVids, uint32(v.Id)) } for _, v := range deletedVolumes { + glog.V(0).Infof("master see deleted volume %d from %s", uint32(v.Id), dn.Url()) message.DeletedVids = append(message.DeletedVids, uint32(v.Id)) } } + if len(heartbeat.NewEcShards) > 0 || len(heartbeat.DeletedEcShards) > 0 { + + // update master internal volume layouts + t.IncrementalSyncDataNodeEcShards(heartbeat.NewEcShards, heartbeat.DeletedEcShards, dn) + + for _, s := range heartbeat.NewEcShards { + message.NewVids = append(message.NewVids, s.Id) + } + for _, s := range heartbeat.DeletedEcShards { + if dn.HasVolumesById(needle.VolumeId(s.Id)) { + continue + } + message.DeletedVids = append(message.DeletedVids, s.Id) + } + + } + + if len(heartbeat.EcShards) > 0 || heartbeat.HasNoEcShards { + glog.V(1).Infof("master recieved ec shards from %s: %+v", dn.Url(), heartbeat.EcShards) + newShards, deletedShards := t.SyncDataNodeEcShards(heartbeat.EcShards, dn) + + // broadcast the ec vid changes to master clients + for _, s := range newShards { + message.NewVids = append(message.NewVids, uint32(s.VolumeId)) + } + for _, s := range deletedShards { + if dn.HasVolumesById(s.VolumeId) { + continue + } + message.DeletedVids = append(message.DeletedVids, uint32(s.VolumeId)) + } + + } + if len(message.NewVids) > 0 || len(message.DeletedVids) > 0 { ms.clientChansLock.RLock() - for _, ch := range ms.clientChans { + for host, ch := range ms.clientChans { + glog.V(0).Infof("master send to %s: %s", host, message.String()) ch <- message } ms.clientChansLock.RUnlock() @@ -102,12 +157,15 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ // tell the volume servers about the leader newLeader, err := t.Leader() - if err == nil { - if err := stream.Send(&master_pb.HeartbeatResponse{ - Leader: newLeader, - }); err != nil { - return err - } + if err != nil { + glog.Warningf("SendHeartbeat find leader: %v", err) + return err + } + if err := stream.Send(&master_pb.HeartbeatResponse{ + Leader: newLeader, + }); err != nil { + glog.Warningf("SendHeartbeat.Send response to to %s:%d %v", dn.Ip, dn.Port, err) + return err } } } @@ -122,38 +180,16 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ } if !ms.Topo.IsLeader() { - return raft.NotLeaderError - } - - // remember client address - ctx := stream.Context() - // fmt.Printf("FromContext %+v\n", ctx) - pr, ok := peer.FromContext(ctx) - if !ok { - glog.Error("failed to get peer from ctx") - return fmt.Errorf("failed to get peer from ctx") - } - if pr.Addr == net.Addr(nil) { - glog.Error("failed to get peer address") - return fmt.Errorf("failed to get peer address") + return ms.informNewLeader(stream) } - clientName := req.Name + pr.Addr.String() - glog.V(0).Infof("+ client %v", clientName) + peerAddress := findClientAddress(stream.Context(), req.GrpcPort) - messageChan := make(chan *master_pb.VolumeLocation) stopChan := make(chan bool) - ms.clientChansLock.Lock() - ms.clientChans[clientName] = messageChan - ms.clientChansLock.Unlock() + clientName, messageChan := ms.addClient(req.Name, peerAddress) - defer func() { - glog.V(0).Infof("- client %v", clientName) - ms.clientChansLock.Lock() - delete(ms.clientChans, clientName) - ms.clientChansLock.Unlock() - }() + defer ms.deleteClient(clientName) for _, message := range ms.Topo.ToVolumeLocations() { if err := stream.Send(message); err != nil { @@ -172,6 +208,7 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ } }() + ticker := time.NewTicker(5 * time.Second) for { select { case message := <-messageChan: @@ -179,6 +216,10 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ glog.V(0).Infof("=> client %v: %+v", clientName, message) return err } + case <-ticker.C: + if !ms.Topo.IsLeader() { + return ms.informNewLeader(stream) + } case <-stopChan: return nil } @@ -186,3 +227,71 @@ func (ms *MasterServer) KeepConnected(stream master_pb.Seaweed_KeepConnectedServ return nil } + +func (ms *MasterServer) informNewLeader(stream master_pb.Seaweed_KeepConnectedServer) error { + leader, err := ms.Topo.Leader() + if err != nil { + glog.Errorf("topo leader: %v", err) + return raft.NotLeaderError + } + if err := stream.Send(&master_pb.VolumeLocation{ + Leader: leader, + }); err != nil { + return err + } + return nil +} + +func (ms *MasterServer) addClient(clientType string, clientAddress string) (clientName string, messageChan chan *master_pb.VolumeLocation) { + clientName = clientType + "@" + clientAddress + glog.V(0).Infof("+ client %v", clientName) + + messageChan = make(chan *master_pb.VolumeLocation) + + ms.clientChansLock.Lock() + ms.clientChans[clientName] = messageChan + ms.clientChansLock.Unlock() + return +} + +func (ms *MasterServer) deleteClient(clientName string) { + glog.V(0).Infof("- client %v", clientName) + ms.clientChansLock.Lock() + delete(ms.clientChans, clientName) + ms.clientChansLock.Unlock() +} + +func findClientAddress(ctx context.Context, grpcPort uint32) string { + // fmt.Printf("FromContext %+v\n", ctx) + pr, ok := peer.FromContext(ctx) + if !ok { + glog.Error("failed to get peer from ctx") + return "" + } + if pr.Addr == net.Addr(nil) { + glog.Error("failed to get peer address") + return "" + } + if grpcPort == 0 { + return pr.Addr.String() + } + if tcpAddr, ok := pr.Addr.(*net.TCPAddr); ok { + externalIP := tcpAddr.IP + return fmt.Sprintf("%s:%d", externalIP, grpcPort) + } + return pr.Addr.String() + +} + +func (ms *MasterServer) ListMasterClients(ctx context.Context, req *master_pb.ListMasterClientsRequest) (*master_pb.ListMasterClientsResponse, error) { + resp := &master_pb.ListMasterClientsResponse{} + ms.clientChansLock.RLock() + defer ms.clientChansLock.RUnlock() + + for k := range ms.clientChans { + if strings.HasPrefix(k, req.ClientType+"@") { + resp.GrpcAddresses = append(resp.GrpcAddresses, k[len(req.ClientType)+1:]) + } + } + return resp, nil +} diff --git a/weed/server/master_grpc_server_collection.go b/weed/server/master_grpc_server_collection.go new file mode 100644 index 000000000..b92d6bcbe --- /dev/null +++ b/weed/server/master_grpc_server_collection.go @@ -0,0 +1,95 @@ +package weed_server + +import ( + "context" + + "github.com/chrislusf/raft" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" +) + +func (ms *MasterServer) CollectionList(ctx context.Context, req *master_pb.CollectionListRequest) (*master_pb.CollectionListResponse, error) { + + if !ms.Topo.IsLeader() { + return nil, raft.NotLeaderError + } + + resp := &master_pb.CollectionListResponse{} + collections := ms.Topo.ListCollections(req.IncludeNormalVolumes, req.IncludeEcVolumes) + for _, c := range collections { + resp.Collections = append(resp.Collections, &master_pb.Collection{ + Name: c, + }) + } + + return resp, nil +} + +func (ms *MasterServer) CollectionDelete(ctx context.Context, req *master_pb.CollectionDeleteRequest) (*master_pb.CollectionDeleteResponse, error) { + + if !ms.Topo.IsLeader() { + return nil, raft.NotLeaderError + } + + resp := &master_pb.CollectionDeleteResponse{} + + err := ms.doDeleteNormalCollection(req.Name) + + if err != nil { + return nil, err + } + + err = ms.doDeleteEcCollection(req.Name) + + if err != nil { + return nil, err + } + + return resp, nil +} + +func (ms *MasterServer) doDeleteNormalCollection(collectionName string) error { + + collection, ok := ms.Topo.FindCollection(collectionName) + if !ok { + return nil + } + + for _, server := range collection.ListVolumeServers() { + err := operation.WithVolumeServerClient(server.Url(), ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + _, deleteErr := client.DeleteCollection(context.Background(), &volume_server_pb.DeleteCollectionRequest{ + Collection: collectionName, + }) + return deleteErr + }) + if err != nil { + return err + } + } + ms.Topo.DeleteCollection(collectionName) + + return nil +} + +func (ms *MasterServer) doDeleteEcCollection(collectionName string) error { + + listOfEcServers := ms.Topo.ListEcServersByCollection(collectionName) + + for _, server := range listOfEcServers { + err := operation.WithVolumeServerClient(server, ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + _, deleteErr := client.DeleteCollection(context.Background(), &volume_server_pb.DeleteCollectionRequest{ + Collection: collectionName, + }) + return deleteErr + }) + if err != nil { + return err + } + } + + ms.Topo.DeleteEcCollection(collectionName) + + return nil +} diff --git a/weed/server/master_grpc_server_lookup.go b/weed/server/master_grpc_server_lookup.go deleted file mode 100644 index 05458f1cf..000000000 --- a/weed/server/master_grpc_server_lookup.go +++ /dev/null @@ -1,29 +0,0 @@ -package weed_server - -import ( - "context" - - "github.com/chrislusf/seaweedfs/weed/pb/master_pb" -) - -func (ms *MasterServer) LookupVolume(ctx context.Context, req *master_pb.LookupVolumeRequest) (*master_pb.LookupVolumeResponse, error) { - resp := &master_pb.LookupVolumeResponse{} - volumeLocations := ms.lookupVolumeId(req.VolumeIds, req.Collection) - - for _, result := range volumeLocations { - var locations []*master_pb.Location - for _, loc := range result.Locations { - locations = append(locations, &master_pb.Location{ - Url: loc.Url, - PublicUrl: loc.PublicUrl, - }) - } - resp.VolumeIdLocations = append(resp.VolumeIdLocations, &master_pb.LookupVolumeResponse_VolumeIdLocation{ - VolumeId: result.VolumeId, - Locations: locations, - Error: result.Error, - }) - } - - return resp, nil -} diff --git a/weed/server/master_grpc_server_volume.go b/weed/server/master_grpc_server_volume.go new file mode 100644 index 000000000..856c07890 --- /dev/null +++ b/weed/server/master_grpc_server_volume.go @@ -0,0 +1,188 @@ +package weed_server + +import ( + "context" + "fmt" + + "github.com/chrislusf/raft" + + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + "github.com/chrislusf/seaweedfs/weed/topology" +) + +func (ms *MasterServer) LookupVolume(ctx context.Context, req *master_pb.LookupVolumeRequest) (*master_pb.LookupVolumeResponse, error) { + + if !ms.Topo.IsLeader() { + return nil, raft.NotLeaderError + } + + resp := &master_pb.LookupVolumeResponse{} + volumeLocations := ms.lookupVolumeId(req.VolumeIds, req.Collection) + + for _, result := range volumeLocations { + var locations []*master_pb.Location + for _, loc := range result.Locations { + locations = append(locations, &master_pb.Location{ + Url: loc.Url, + PublicUrl: loc.PublicUrl, + }) + } + resp.VolumeIdLocations = append(resp.VolumeIdLocations, &master_pb.LookupVolumeResponse_VolumeIdLocation{ + VolumeId: result.VolumeId, + Locations: locations, + Error: result.Error, + }) + } + + return resp, nil +} + +func (ms *MasterServer) Assign(ctx context.Context, req *master_pb.AssignRequest) (*master_pb.AssignResponse, error) { + + if !ms.Topo.IsLeader() { + return nil, raft.NotLeaderError + } + + if req.Count == 0 { + req.Count = 1 + } + + if req.Replication == "" { + req.Replication = ms.option.DefaultReplicaPlacement + } + replicaPlacement, err := super_block.NewReplicaPlacementFromString(req.Replication) + if err != nil { + return nil, err + } + ttl, err := needle.ReadTTL(req.Ttl) + if err != nil { + return nil, err + } + + option := &topology.VolumeGrowOption{ + Collection: req.Collection, + ReplicaPlacement: replicaPlacement, + Ttl: ttl, + Prealloacte: ms.preallocateSize, + DataCenter: req.DataCenter, + Rack: req.Rack, + DataNode: req.DataNode, + MemoryMapMaxSizeMb: req.MemoryMapMaxSizeMb, + } + + if !ms.Topo.HasWritableVolume(option) { + if ms.Topo.FreeSpace() <= 0 { + return nil, fmt.Errorf("No free volumes left!") + } + ms.vgLock.Lock() + if !ms.Topo.HasWritableVolume(option) { + if _, err = ms.vg.AutomaticGrowByType(option, ms.grpcDialOption, ms.Topo, int(req.WritableVolumeCount)); err != nil { + ms.vgLock.Unlock() + return nil, fmt.Errorf("Cannot grow volume group! %v", err) + } + } + ms.vgLock.Unlock() + } + fid, count, dn, err := ms.Topo.PickForWrite(req.Count, option) + if err != nil { + return nil, fmt.Errorf("%v", err) + } + + return &master_pb.AssignResponse{ + Fid: fid, + Url: dn.Url(), + PublicUrl: dn.PublicUrl, + Count: count, + Auth: string(security.GenJwt(ms.guard.SigningKey, ms.guard.ExpiresAfterSec, fid)), + }, nil +} + +func (ms *MasterServer) Statistics(ctx context.Context, req *master_pb.StatisticsRequest) (*master_pb.StatisticsResponse, error) { + + if !ms.Topo.IsLeader() { + return nil, raft.NotLeaderError + } + + if req.Replication == "" { + req.Replication = ms.option.DefaultReplicaPlacement + } + replicaPlacement, err := super_block.NewReplicaPlacementFromString(req.Replication) + if err != nil { + return nil, err + } + ttl, err := needle.ReadTTL(req.Ttl) + if err != nil { + return nil, err + } + + volumeLayout := ms.Topo.GetVolumeLayout(req.Collection, replicaPlacement, ttl) + stats := volumeLayout.Stats() + + resp := &master_pb.StatisticsResponse{ + TotalSize: stats.TotalSize, + UsedSize: stats.UsedSize, + FileCount: stats.FileCount, + } + + return resp, nil +} + +func (ms *MasterServer) VolumeList(ctx context.Context, req *master_pb.VolumeListRequest) (*master_pb.VolumeListResponse, error) { + + if !ms.Topo.IsLeader() { + return nil, raft.NotLeaderError + } + + resp := &master_pb.VolumeListResponse{ + TopologyInfo: ms.Topo.ToTopologyInfo(), + VolumeSizeLimitMb: uint64(ms.option.VolumeSizeLimitMB), + } + + return resp, nil +} + +func (ms *MasterServer) LookupEcVolume(ctx context.Context, req *master_pb.LookupEcVolumeRequest) (*master_pb.LookupEcVolumeResponse, error) { + + if !ms.Topo.IsLeader() { + return nil, raft.NotLeaderError + } + + resp := &master_pb.LookupEcVolumeResponse{} + + ecLocations, found := ms.Topo.LookupEcShards(needle.VolumeId(req.VolumeId)) + + if !found { + return resp, fmt.Errorf("ec volume %d not found", req.VolumeId) + } + + resp.VolumeId = req.VolumeId + + for shardId, shardLocations := range ecLocations.Locations { + var locations []*master_pb.Location + for _, dn := range shardLocations { + locations = append(locations, &master_pb.Location{ + Url: string(dn.Id()), + PublicUrl: dn.PublicUrl, + }) + } + resp.ShardIdLocations = append(resp.ShardIdLocations, &master_pb.LookupEcVolumeResponse_EcShardIdLocation{ + ShardId: uint32(shardId), + Locations: locations, + }) + } + + return resp, nil +} + +func (ms *MasterServer) GetMasterConfiguration(ctx context.Context, req *master_pb.GetMasterConfigurationRequest) (*master_pb.GetMasterConfigurationResponse, error) { + + resp := &master_pb.GetMasterConfigurationResponse{ + MetricsAddress: ms.option.MetricsAddress, + MetricsIntervalSeconds: uint32(ms.option.MetricsIntervalSec), + } + + return resp, nil +} diff --git a/weed/server/master_server.go b/weed/server/master_server.go index f22925e56..a9ae6b888 100644 --- a/weed/server/master_server.go +++ b/weed/server/master_server.go @@ -5,27 +5,51 @@ import ( "net/http" "net/http/httputil" "net/url" + "os" + "regexp" + "strconv" + "strings" "sync" + "time" "github.com/chrislusf/raft" + "github.com/gorilla/mux" + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/master_pb" "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/sequence" + "github.com/chrislusf/seaweedfs/weed/shell" "github.com/chrislusf/seaweedfs/weed/topology" "github.com/chrislusf/seaweedfs/weed/util" - "github.com/gorilla/mux" + "github.com/chrislusf/seaweedfs/weed/wdclient" ) +const ( + SequencerType = "master.sequencer.type" + SequencerEtcdUrls = "master.sequencer.sequencer_etcd_urls" +) + +type MasterOption struct { + Port int + MetaFolder string + VolumeSizeLimitMB uint + VolumePreallocate bool + PulseSeconds int + DefaultReplicaPlacement string + GarbageThreshold float64 + WhiteList []string + DisableHttp bool + MetricsAddress string + MetricsIntervalSec int +} + type MasterServer struct { - port int - metaFolder string - volumeSizeLimitMB uint - preallocate int64 - pulseSeconds int - defaultReplicaPlacement string - garbageThreshold float64 - guard *security.Guard + option *MasterOption + guard *security.Guard + + preallocateSize int64 Topo *topology.Topology vg *topology.VolumeGrowth @@ -36,56 +60,71 @@ type MasterServer struct { // notifying clients clientChansLock sync.RWMutex clientChans map[string]chan *master_pb.VolumeLocation + + grpcDialOption grpc.DialOption + + MasterClient *wdclient.MasterClient } -func NewMasterServer(r *mux.Router, port int, metaFolder string, - volumeSizeLimitMB uint, - preallocate bool, - pulseSeconds int, - defaultReplicaPlacement string, - garbageThreshold float64, - whiteList []string, - secureKey string, -) *MasterServer { +func NewMasterServer(r *mux.Router, option *MasterOption, peers []string) *MasterServer { + + v := util.GetViper() + signingKey := v.GetString("jwt.signing.key") + v.SetDefault("jwt.signing.expires_after_seconds", 10) + expiresAfterSec := v.GetInt("jwt.signing.expires_after_seconds") + + readSigningKey := v.GetString("jwt.signing.read.key") + v.SetDefault("jwt.signing.read.expires_after_seconds", 60) + readExpiresAfterSec := v.GetInt("jwt.signing.read.expires_after_seconds") var preallocateSize int64 - if preallocate { - preallocateSize = int64(volumeSizeLimitMB) * (1 << 20) + if option.VolumePreallocate { + preallocateSize = int64(option.VolumeSizeLimitMB) * (1 << 20) } + + grpcDialOption := security.LoadClientTLS(v, "grpc.master") ms := &MasterServer{ - port: port, - volumeSizeLimitMB: volumeSizeLimitMB, - preallocate: preallocateSize, - pulseSeconds: pulseSeconds, - defaultReplicaPlacement: defaultReplicaPlacement, - garbageThreshold: garbageThreshold, - clientChans: make(map[string]chan *master_pb.VolumeLocation), + option: option, + preallocateSize: preallocateSize, + clientChans: make(map[string]chan *master_pb.VolumeLocation), + grpcDialOption: grpcDialOption, + MasterClient: wdclient.NewMasterClient(grpcDialOption, "master", 0, peers), } ms.bounedLeaderChan = make(chan int, 16) - seq := sequence.NewMemorySequencer() - ms.Topo = topology.NewTopology("topo", seq, uint64(volumeSizeLimitMB)*1024*1024, pulseSeconds) + + seq := ms.createSequencer(option) + if nil == seq { + glog.Fatalf("create sequencer failed.") + } + ms.Topo = topology.NewTopology("topo", seq, uint64(ms.option.VolumeSizeLimitMB)*1024*1024, ms.option.PulseSeconds) ms.vg = topology.NewDefaultVolumeGrowth() - glog.V(0).Infoln("Volume Size Limit is", volumeSizeLimitMB, "MB") - - ms.guard = security.NewGuard(whiteList, secureKey) - - handleStaticResources2(r) - r.HandleFunc("/", ms.uiStatusHandler) - r.HandleFunc("/ui/index.html", ms.uiStatusHandler) - r.HandleFunc("/dir/assign", ms.proxyToLeader(ms.guard.WhiteList(ms.dirAssignHandler))) - r.HandleFunc("/dir/lookup", ms.proxyToLeader(ms.guard.WhiteList(ms.dirLookupHandler))) - r.HandleFunc("/dir/status", ms.proxyToLeader(ms.guard.WhiteList(ms.dirStatusHandler))) - r.HandleFunc("/col/delete", ms.proxyToLeader(ms.guard.WhiteList(ms.collectionDeleteHandler))) - r.HandleFunc("/vol/grow", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeGrowHandler))) - r.HandleFunc("/vol/status", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeStatusHandler))) - r.HandleFunc("/vol/vacuum", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeVacuumHandler))) - r.HandleFunc("/submit", ms.guard.WhiteList(ms.submitFromMasterServerHandler)) - r.HandleFunc("/stats/health", ms.guard.WhiteList(statsHealthHandler)) - r.HandleFunc("/stats/counter", ms.guard.WhiteList(statsCounterHandler)) - r.HandleFunc("/stats/memory", ms.guard.WhiteList(statsMemoryHandler)) - r.HandleFunc("/{fileId}", ms.proxyToLeader(ms.redirectHandler)) - - ms.Topo.StartRefreshWritableVolumes(garbageThreshold, ms.preallocate) + glog.V(0).Infoln("Volume Size Limit is", ms.option.VolumeSizeLimitMB, "MB") + + ms.guard = security.NewGuard(ms.option.WhiteList, signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec) + + if !ms.option.DisableHttp { + handleStaticResources2(r) + r.HandleFunc("/", ms.proxyToLeader(ms.uiStatusHandler)) + r.HandleFunc("/ui/index.html", ms.uiStatusHandler) + r.HandleFunc("/dir/assign", ms.proxyToLeader(ms.guard.WhiteList(ms.dirAssignHandler))) + r.HandleFunc("/dir/lookup", ms.guard.WhiteList(ms.dirLookupHandler)) + r.HandleFunc("/dir/status", ms.proxyToLeader(ms.guard.WhiteList(ms.dirStatusHandler))) + r.HandleFunc("/col/delete", ms.proxyToLeader(ms.guard.WhiteList(ms.collectionDeleteHandler))) + r.HandleFunc("/vol/grow", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeGrowHandler))) + r.HandleFunc("/vol/status", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeStatusHandler))) + r.HandleFunc("/vol/vacuum", ms.proxyToLeader(ms.guard.WhiteList(ms.volumeVacuumHandler))) + r.HandleFunc("/submit", ms.guard.WhiteList(ms.submitFromMasterServerHandler)) + /* + r.HandleFunc("/stats/health", ms.guard.WhiteList(statsHealthHandler)) + r.HandleFunc("/stats/counter", ms.guard.WhiteList(statsCounterHandler)) + r.HandleFunc("/stats/memory", ms.guard.WhiteList(statsMemoryHandler)) + */ + r.HandleFunc("/{fileId}", ms.redirectHandler) + } + + ms.Topo.StartRefreshWritableVolumes(ms.grpcDialOption, ms.option.GarbageThreshold, ms.preallocateSize) + + ms.startAdminScripts() return ms } @@ -98,6 +137,9 @@ func (ms *MasterServer) SetRaftServer(raftServer *RaftServer) { glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", ms.Topo.RaftServer.Leader(), "becomes leader.") } }) + ms.Topo.RaftServer.AddEventListener(raft.StateChangeEventType, func(e raft.Event) { + glog.V(0).Infof("state change: %+v", e) + }) if ms.Topo.IsLeader() { glog.V(0).Infoln("[", ms.Topo.RaftServer.Name(), "]", "I am the leader!") } else { @@ -133,8 +175,97 @@ func (ms *MasterServer) proxyToLeader(f func(w http.ResponseWriter, r *http.Requ proxy.Transport = util.Transport proxy.ServeHTTP(w, r) } else { - //drop it to the floor - //writeJsonError(w, r, errors.New(ms.Topo.RaftServer.Name()+" does not know Leader yet:"+ms.Topo.RaftServer.Leader())) + // drop it to the floor + // writeJsonError(w, r, errors.New(ms.Topo.RaftServer.Name()+" does not know Leader yet:"+ms.Topo.RaftServer.Leader())) + } + } +} + +func (ms *MasterServer) startAdminScripts() { + var err error + + v := util.GetViper() + adminScripts := v.GetString("master.maintenance.scripts") + glog.V(0).Infof("adminScripts:\n%v", adminScripts) + if adminScripts == "" { + return + } + + v.SetDefault("master.maintenance.sleep_minutes", 17) + sleepMinutes := v.GetInt("master.maintenance.sleep_minutes") + + v.SetDefault("master.filer.default_filer_url", "http://localhost:8888/") + filerURL := v.GetString("master.filer.default_filer_url") + + scriptLines := strings.Split(adminScripts, "\n") + + masterAddress := "localhost:" + strconv.Itoa(ms.option.Port) + + var shellOptions shell.ShellOptions + shellOptions.GrpcDialOption = security.LoadClientTLS(v, "grpc.master") + shellOptions.Masters = &masterAddress + + shellOptions.FilerHost, shellOptions.FilerPort, shellOptions.Directory, err = util.ParseFilerUrl(filerURL) + if err != nil { + glog.V(0).Infof("failed to parse master.filer.default_filer_urll=%s : %v\n", filerURL, err) + return + } + + commandEnv := shell.NewCommandEnv(shellOptions) + + reg, _ := regexp.Compile(`'.*?'|".*?"|\S+`) + + go commandEnv.MasterClient.KeepConnectedToMaster() + + go func() { + commandEnv.MasterClient.WaitUntilConnected() + + c := time.Tick(time.Duration(sleepMinutes) * time.Minute) + for range c { + if ms.Topo.IsLeader() { + for _, line := range scriptLines { + + cmds := reg.FindAllString(line, -1) + if len(cmds) == 0 { + continue + } + args := make([]string, len(cmds[1:])) + for i := range args { + args[i] = strings.Trim(string(cmds[1+i]), "\"'") + } + cmd := strings.ToLower(cmds[0]) + + for _, c := range shell.Commands { + if c.Name() == cmd { + glog.V(0).Infof("executing: %s %v", cmd, args) + if err := c.Do(args, commandEnv, os.Stdout); err != nil { + glog.V(0).Infof("error: %v", err) + } + } + } + } + } + } + }() +} + +func (ms *MasterServer) createSequencer(option *MasterOption) sequence.Sequencer { + var seq sequence.Sequencer + v := util.GetViper() + seqType := strings.ToLower(v.GetString(SequencerType)) + glog.V(1).Infof("[%s] : [%s]", SequencerType, seqType) + switch strings.ToLower(seqType) { + case "etcd": + var err error + urls := v.GetString(SequencerEtcdUrls) + glog.V(0).Infof("[%s] : [%s]", SequencerEtcdUrls, urls) + seq, err = sequence.NewEtcdSequencer(urls, option.MetaFolder) + if err != nil { + glog.Error(err) + seq = nil } + default: + seq = sequence.NewMemorySequencer() } + return seq } diff --git a/weed/server/master_server_handlers.go b/weed/server/master_server_handlers.go index a797dddfc..514d86800 100644 --- a/weed/server/master_server_handlers.go +++ b/weed/server/master_server_handlers.go @@ -7,8 +7,9 @@ import ( "strings" "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/stats" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) func (ms *MasterServer) lookupVolumeId(vids []string, collection string) (volumeLocations map[string]operation.LookupResult) { @@ -21,43 +22,77 @@ func (ms *MasterServer) lookupVolumeId(vids []string, collection string) (volume if _, ok := volumeLocations[vid]; ok { continue } - volumeId, err := storage.NewVolumeId(vid) - if err == nil { - machines := ms.Topo.Lookup(collection, volumeId) - if machines != nil { - var ret []operation.Location - for _, dn := range machines { - ret = append(ret, operation.Location{Url: dn.Url(), PublicUrl: dn.PublicUrl}) - } - volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Locations: ret} - } else { - volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Error: fmt.Sprintf("volumeId %s not found.", vid)} - } - } else { - volumeLocations[vid] = operation.LookupResult{VolumeId: vid, Error: fmt.Sprintf("Unknown volumeId format: %s", vid)} - } + volumeLocations[vid] = ms.findVolumeLocation(collection, vid) } return } -// Takes one volumeId only, can not do batch lookup +// If "fileId" is provided, this returns the fileId location and a JWT to update or delete the file. +// If "volumeId" is provided, this only returns the volumeId location func (ms *MasterServer) dirLookupHandler(w http.ResponseWriter, r *http.Request) { vid := r.FormValue("volumeId") - commaSep := strings.Index(vid, ",") - if commaSep > 0 { - vid = vid[0:commaSep] + if vid != "" { + // backward compatible + commaSep := strings.Index(vid, ",") + if commaSep > 0 { + vid = vid[0:commaSep] + } + } + fileId := r.FormValue("fileId") + if fileId != "" { + commaSep := strings.Index(fileId, ",") + if commaSep > 0 { + vid = fileId[0:commaSep] + } } - vids := []string{vid} collection := r.FormValue("collection") //optional, but can be faster if too many collections - volumeLocations := ms.lookupVolumeId(vids, collection) - location := volumeLocations[vid] + location := ms.findVolumeLocation(collection, vid) httpStatus := http.StatusOK - if location.Error != "" { + if location.Error != "" || location.Locations == nil { httpStatus = http.StatusNotFound + } else { + forRead := r.FormValue("read") + isRead := forRead == "yes" + ms.maybeAddJwtAuthorization(w, fileId, !isRead) } writeJsonQuiet(w, r, httpStatus, location) } +// findVolumeLocation finds the volume location from master topo if it is leader, +// or from master client if not leader +func (ms *MasterServer) findVolumeLocation(collection, vid string) operation.LookupResult { + var locations []operation.Location + var err error + if ms.Topo.IsLeader() { + volumeId, newVolumeIdErr := needle.NewVolumeId(vid) + if newVolumeIdErr != nil { + err = fmt.Errorf("Unknown volume id %s", vid) + } else { + machines := ms.Topo.Lookup(collection, volumeId) + for _, loc := range machines { + locations = append(locations, operation.Location{Url: loc.Url(), PublicUrl: loc.PublicUrl}) + } + if locations == nil { + err = fmt.Errorf("volume id %s not found", vid) + } + } + } else { + machines, getVidLocationsErr := ms.MasterClient.GetVidLocations(vid) + for _, loc := range machines { + locations = append(locations, operation.Location{Url: loc.Url, PublicUrl: loc.PublicUrl}) + } + err = getVidLocationsErr + } + ret := operation.LookupResult{ + VolumeId: vid, + Locations: locations, + } + if err != nil { + ret.Error = err.Error() + } + return ret +} + func (ms *MasterServer) dirAssignHandler(w http.ResponseWriter, r *http.Request) { stats.AssignRequest() requestedCount, e := strconv.ParseUint(r.FormValue("count"), 10, 64) @@ -65,6 +100,11 @@ func (ms *MasterServer) dirAssignHandler(w http.ResponseWriter, r *http.Request) requestedCount = 1 } + writableVolumeCount, e := strconv.Atoi(r.FormValue("writableVolumeCount")) + if e != nil { + writableVolumeCount = 0 + } + option, err := ms.getVolumeGrowOption(r) if err != nil { writeJsonQuiet(w, r, http.StatusNotAcceptable, operation.AssignResult{Error: err.Error()}) @@ -79,7 +119,7 @@ func (ms *MasterServer) dirAssignHandler(w http.ResponseWriter, r *http.Request) ms.vgLock.Lock() defer ms.vgLock.Unlock() if !ms.Topo.HasWritableVolume(option) { - if _, err = ms.vg.AutomaticGrowByType(option, ms.Topo); err != nil { + if _, err = ms.vg.AutomaticGrowByType(option, ms.grpcDialOption, ms.Topo, writableVolumeCount); err != nil { writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Cannot grow volume group! %v", err)) return @@ -88,8 +128,23 @@ func (ms *MasterServer) dirAssignHandler(w http.ResponseWriter, r *http.Request) } fid, count, dn, err := ms.Topo.PickForWrite(requestedCount, option) if err == nil { + ms.maybeAddJwtAuthorization(w, fid, true) writeJsonQuiet(w, r, http.StatusOK, operation.AssignResult{Fid: fid, Url: dn.Url(), PublicUrl: dn.PublicUrl, Count: count}) } else { writeJsonQuiet(w, r, http.StatusNotAcceptable, operation.AssignResult{Error: err.Error()}) } } + +func (ms *MasterServer) maybeAddJwtAuthorization(w http.ResponseWriter, fileId string, isWrite bool) { + var encodedJwt security.EncodedJwt + if isWrite { + encodedJwt = security.GenJwt(ms.guard.SigningKey, ms.guard.ExpiresAfterSec, fileId) + } else { + encodedJwt = security.GenJwt(ms.guard.ReadSigningKey, ms.guard.ReadExpiresAfterSec, fileId) + } + if encodedJwt == "" { + return + } + + w.Header().Set("Authorization", "BEARER "+string(encodedJwt)) +} diff --git a/weed/server/master_server_handlers_admin.go b/weed/server/master_server_handlers_admin.go index 490693d97..5d0986f97 100644 --- a/weed/server/master_server_handlers_admin.go +++ b/weed/server/master_server_handlers_admin.go @@ -2,7 +2,6 @@ package weed_server import ( "context" - "errors" "fmt" "math/rand" "net/http" @@ -11,19 +10,22 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/backend/memory_map" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/topology" "github.com/chrislusf/seaweedfs/weed/util" ) func (ms *MasterServer) collectionDeleteHandler(w http.ResponseWriter, r *http.Request) { - collection, ok := ms.Topo.FindCollection(r.FormValue("collection")) + collectionName := r.FormValue("collection") + collection, ok := ms.Topo.FindCollection(collectionName) if !ok { - writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("collection %s does not exist", r.FormValue("collection"))) + writeJsonError(w, r, http.StatusBadRequest, fmt.Errorf("collection %s does not exist", collectionName)) return } for _, server := range collection.ListVolumeServers() { - err := operation.WithVolumeServerClient(server.Url(), func(client volume_server_pb.VolumeServerClient) error { + err := operation.WithVolumeServerClient(server.Url(), ms.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { _, deleteErr := client.DeleteCollection(context.Background(), &volume_server_pb.DeleteCollectionRequest{ Collection: collection.Name, }) @@ -34,7 +36,10 @@ func (ms *MasterServer) collectionDeleteHandler(w http.ResponseWriter, r *http.R return } } - ms.Topo.DeleteCollection(r.FormValue("collection")) + ms.Topo.DeleteCollection(collectionName) + + w.WriteHeader(http.StatusNoContent) + return } func (ms *MasterServer) dirStatusHandler(w http.ResponseWriter, r *http.Request) { @@ -46,17 +51,18 @@ func (ms *MasterServer) dirStatusHandler(w http.ResponseWriter, r *http.Request) func (ms *MasterServer) volumeVacuumHandler(w http.ResponseWriter, r *http.Request) { gcString := r.FormValue("garbageThreshold") - gcThreshold := ms.garbageThreshold + gcThreshold := ms.option.GarbageThreshold if gcString != "" { var err error gcThreshold, err = strconv.ParseFloat(gcString, 32) if err != nil { glog.V(0).Infof("garbageThreshold %s is not a valid float number: %v", gcString, err) + writeJsonError(w, r, http.StatusNotAcceptable, fmt.Errorf("garbageThreshold %s is not a valid float number", gcString)) return } } - glog.Infoln("garbageThreshold =", gcThreshold) - ms.Topo.Vacuum(gcThreshold, ms.preallocate) + // glog.Infoln("garbageThreshold =", gcThreshold) + ms.Topo.Vacuum(ms.grpcDialOption, gcThreshold, ms.preallocateSize) ms.dirStatusHandler(w, r) } @@ -67,17 +73,17 @@ func (ms *MasterServer) volumeGrowHandler(w http.ResponseWriter, r *http.Request writeJsonError(w, r, http.StatusNotAcceptable, err) return } - if err == nil { - if count, err = strconv.Atoi(r.FormValue("count")); err == nil { - if ms.Topo.FreeSpace() < count*option.ReplicaPlacement.GetCopyCount() { - err = errors.New("Only " + strconv.Itoa(ms.Topo.FreeSpace()) + " volumes left! Not enough for " + strconv.Itoa(count*option.ReplicaPlacement.GetCopyCount())) - } else { - count, err = ms.vg.GrowByCountAndType(count, option, ms.Topo) - } + + if count, err = strconv.Atoi(r.FormValue("count")); err == nil { + if ms.Topo.FreeSpace() < int64(count*option.ReplicaPlacement.GetCopyCount()) { + err = fmt.Errorf("only %d volumes left, not enough for %d", ms.Topo.FreeSpace(), count*option.ReplicaPlacement.GetCopyCount()) } else { - err = errors.New("parameter count is not found") + count, err = ms.vg.GrowByCountAndType(ms.grpcDialOption, count, option, ms.Topo) } + } else { + err = fmt.Errorf("can not parse parameter count %s", r.FormValue("count")) } + if err != nil { writeJsonError(w, r, http.StatusNotAcceptable, err) } else { @@ -94,23 +100,19 @@ func (ms *MasterServer) volumeStatusHandler(w http.ResponseWriter, r *http.Reque func (ms *MasterServer) redirectHandler(w http.ResponseWriter, r *http.Request) { vid, _, _, _, _ := parseURLPath(r.URL.Path) - volumeId, err := storage.NewVolumeId(vid) - if err != nil { - debug("parsing error:", err, r.URL.Path) - return - } collection := r.FormValue("collection") - machines := ms.Topo.Lookup(collection, volumeId) - if machines != nil && len(machines) > 0 { + location := ms.findVolumeLocation(collection, vid) + if location.Error == "" { + loc := location.Locations[rand.Intn(len(location.Locations))] var url string if r.URL.RawQuery != "" { - url = util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl) + r.URL.Path + "?" + r.URL.RawQuery + url = util.NormalizeUrl(loc.PublicUrl) + r.URL.Path + "?" + r.URL.RawQuery } else { - url = util.NormalizeUrl(machines[rand.Intn(len(machines))].PublicUrl) + r.URL.Path + url = util.NormalizeUrl(loc.PublicUrl) + r.URL.Path } http.Redirect(w, r, url, http.StatusMovedPermanently) } else { - writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("volume id %d or collection %s not found", volumeId, collection)) + writeJsonError(w, r, http.StatusNotFound, fmt.Errorf("volume id %s not found: %s", vid, location.Error)) } } @@ -118,17 +120,17 @@ func (ms *MasterServer) selfUrl(r *http.Request) string { if r.Host != "" { return r.Host } - return "localhost:" + strconv.Itoa(ms.port) + return "localhost:" + strconv.Itoa(ms.option.Port) } func (ms *MasterServer) submitFromMasterServerHandler(w http.ResponseWriter, r *http.Request) { if ms.Topo.IsLeader() { - submitForClientHandler(w, r, ms.selfUrl(r)) + submitForClientHandler(w, r, ms.selfUrl(r), ms.grpcDialOption) } else { masterUrl, err := ms.Topo.Leader() if err != nil { writeJsonError(w, r, http.StatusInternalServerError, err) } else { - submitForClientHandler(w, r, masterUrl) + submitForClientHandler(w, r, masterUrl, ms.grpcDialOption) } } } @@ -141,17 +143,22 @@ func (ms *MasterServer) HasWritableVolume(option *topology.VolumeGrowOption) boo func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGrowOption, error) { replicationString := r.FormValue("replication") if replicationString == "" { - replicationString = ms.defaultReplicaPlacement + replicationString = ms.option.DefaultReplicaPlacement } - replicaPlacement, err := storage.NewReplicaPlacementFromString(replicationString) + replicaPlacement, err := super_block.NewReplicaPlacementFromString(replicationString) if err != nil { return nil, err } - ttl, err := storage.ReadTTL(r.FormValue("ttl")) + ttl, err := needle.ReadTTL(r.FormValue("ttl")) if err != nil { return nil, err } - preallocate := ms.preallocate + memoryMapMaxSizeMb, err := memory_map.ReadMemoryMapMaxSizeMb(r.FormValue("memoryMapMaxSizeMb")) + if err != nil { + return nil, err + } + + preallocate := ms.preallocateSize if r.FormValue("preallocate") != "" { preallocate, err = strconv.ParseInt(r.FormValue("preallocate"), 10, 64) if err != nil { @@ -159,13 +166,14 @@ func (ms *MasterServer) getVolumeGrowOption(r *http.Request) (*topology.VolumeGr } } volumeGrowOption := &topology.VolumeGrowOption{ - Collection: r.FormValue("collection"), - ReplicaPlacement: replicaPlacement, - Ttl: ttl, - Prealloacte: preallocate, - DataCenter: r.FormValue("dataCenter"), - Rack: r.FormValue("rack"), - DataNode: r.FormValue("dataNode"), + Collection: r.FormValue("collection"), + ReplicaPlacement: replicaPlacement, + Ttl: ttl, + Prealloacte: preallocate, + DataCenter: r.FormValue("dataCenter"), + Rack: r.FormValue("rack"), + DataNode: r.FormValue("dataNode"), + MemoryMapMaxSizeMb: memoryMapMaxSizeMb, } return volumeGrowOption, nil } diff --git a/weed/server/master_ui/templates.go b/weed/server/master_ui/templates.go index c43ccc02f..b674e3f82 100644 --- a/weed/server/master_ui/templates.go +++ b/weed/server/master_ui/templates.go @@ -14,7 +14,7 @@ var StatusTpl = template.Must(template.New("status").Parse(`<!DOCTYPE html> <div class="container"> <div class="page-header"> <h1> - <img src="/seaweedfsstatic/seaweed50x50.png"></img> + <a href="https://github.com/chrislusf/seaweedfs"><img src="/seaweedfsstatic/seaweed50x50.png"></img></a> SeaweedFS <small>{{ .Version }}</small> </h1> </div> @@ -41,7 +41,7 @@ var StatusTpl = template.Must(template.New("status").Parse(`<!DOCTYPE html> <td class="col-sm-2 field-label"><label>Other Masters:</label></td> <td class="col-sm-10"><ul class="list-unstyled"> {{ range $k, $p := .Peers }} - <li><a href="{{ $p.ConnectionString }}">{{ $p.Name }}</a></li> + <li><a href="http://{{ $p.Name }}/ui/index.html">{{ $p.Name }}</a></li> {{ end }} </ul></td> </tr> @@ -76,6 +76,7 @@ var StatusTpl = template.Must(template.New("status").Parse(`<!DOCTYPE html> <th>Rack</th> <th>RemoteAddr</th> <th>#Volumes</th> + <th>#ErasureCodingShards</th> <th>Max</th> </tr> </thead> @@ -88,6 +89,7 @@ var StatusTpl = template.Must(template.New("status").Parse(`<!DOCTYPE html> <td>{{ $rack.Id }}</td> <td><a href="http://{{ $dn.Url }}/ui/index.html">{{ $dn.Url }}</a></td> <td>{{ $dn.Volumes }}</td> + <td>{{ $dn.EcShards }}</td> <td>{{ $dn.Max }}</td> </tr> {{ end }} diff --git a/weed/server/msg_broker_grpc_server.go b/weed/server/msg_broker_grpc_server.go new file mode 100644 index 000000000..8b13aac76 --- /dev/null +++ b/weed/server/msg_broker_grpc_server.go @@ -0,0 +1,23 @@ +package weed_server + +import ( + "context" + + "github.com/chrislusf/seaweedfs/weed/pb/queue_pb" +) + +func (broker *MessageBroker) ConfigureTopic(context.Context, *queue_pb.ConfigureTopicRequest) (*queue_pb.ConfigureTopicResponse, error) { + panic("implement me") +} + +func (broker *MessageBroker) DeleteTopic(context.Context, *queue_pb.DeleteTopicRequest) (*queue_pb.DeleteTopicResponse, error) { + panic("implement me") +} + +func (broker *MessageBroker) StreamWrite(queue_pb.SeaweedQueue_StreamWriteServer) error { + panic("implement me") +} + +func (broker *MessageBroker) StreamRead(*queue_pb.ReadMessageRequest, queue_pb.SeaweedQueue_StreamReadServer) error { + panic("implement me") +} diff --git a/weed/server/msg_broker_server.go b/weed/server/msg_broker_server.go new file mode 100644 index 000000000..a9d908581 --- /dev/null +++ b/weed/server/msg_broker_server.go @@ -0,0 +1,121 @@ +package weed_server + +import ( + "context" + "fmt" + "time" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type MessageBrokerOption struct { + Filers []string + DefaultReplication string + MaxMB int + Port int +} + +type MessageBroker struct { + option *MessageBrokerOption + grpcDialOption grpc.DialOption +} + +func NewMessageBroker(option *MessageBrokerOption) (messageBroker *MessageBroker, err error) { + + messageBroker = &MessageBroker{ + option: option, + grpcDialOption: security.LoadClientTLS(util.GetViper(), "grpc.msg_broker"), + } + + go messageBroker.loopForEver() + + return messageBroker, nil +} + +func (broker *MessageBroker) loopForEver() { + + for { + broker.checkPeers() + time.Sleep(3 * time.Second) + } + +} + +func (broker *MessageBroker) checkPeers() { + + // contact a filer about masters + var masters []string + for _, filer := range broker.option.Filers { + err := broker.withFilerClient(filer, func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return err + } + masters = append(masters, resp.Masters...) + return nil + }) + if err != nil { + fmt.Printf("failed to read masters from %+v: %v\n", broker.option.Filers, err) + return + } + } + + // contact each masters for filers + var filers []string + for _, master := range masters { + err := broker.withMasterClient(master, func(client master_pb.SeaweedClient) error { + resp, err := client.ListMasterClients(context.Background(), &master_pb.ListMasterClientsRequest{ + ClientType: "filer", + }) + if err != nil { + return err + } + + fmt.Printf("filers: %+v\n", resp.GrpcAddresses) + filers = append(filers, resp.GrpcAddresses...) + + return nil + }) + if err != nil { + fmt.Printf("failed to list filers: %v\n", err) + return + } + } + + // contact each filer about brokers + for _, filer := range filers { + err := broker.withFilerClient(filer, func(client filer_pb.SeaweedFilerClient) error { + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return err + } + masters = append(masters, resp.Masters...) + return nil + }) + if err != nil { + fmt.Printf("failed to read masters from %+v: %v\n", broker.option.Filers, err) + return + } + } + +} + +func (broker *MessageBroker) withFilerClient(filer string, fn func(filer_pb.SeaweedFilerClient) error) error { + + return pb.WithFilerClient(filer, broker.grpcDialOption, fn) + +} + +func (broker *MessageBroker) withMasterClient(master string, fn func(client master_pb.SeaweedClient) error) error { + + return pb.WithMasterClient(master, broker.grpcDialOption, func(client master_pb.SeaweedClient) error { + return fn(client) + }) + +} diff --git a/weed/server/raft_server.go b/weed/server/raft_server.go index 01ca41aac..0381c7feb 100644 --- a/weed/server/raft_server.go +++ b/weed/server/raft_server.go @@ -3,36 +3,37 @@ package weed_server import ( "encoding/json" "io/ioutil" - "math/rand" "os" "path" "reflect" "sort" - "strings" "time" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/raft" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/topology" - "github.com/gorilla/mux" ) type RaftServer struct { peers []string // initial peers to join with raftServer raft.Server dataDir string - httpAddr string - router *mux.Router + serverAddr string topo *topology.Topology + *raft.GrpcServer } -func NewRaftServer(r *mux.Router, peers []string, httpAddr string, dataDir string, topo *topology.Topology, pulseSeconds int) *RaftServer { +func NewRaftServer(grpcDialOption grpc.DialOption, peers []string, serverAddr, dataDir string, topo *topology.Topology, pulseSeconds int) *RaftServer { s := &RaftServer{ - peers: peers, - httpAddr: httpAddr, - dataDir: dataDir, - router: r, - topo: topo, + peers: peers, + serverAddr: serverAddr, + dataDir: dataDir, + topo: topo, } if glog.V(4) { @@ -42,41 +43,39 @@ func NewRaftServer(r *mux.Router, peers []string, httpAddr string, dataDir strin raft.RegisterCommand(&topology.MaxVolumeIdCommand{}) var err error - transporter := raft.NewHTTPTransporter("/cluster", 0) - transporter.Transport.MaxIdleConnsPerHost = 1024 - glog.V(0).Infof("Starting RaftServer with %v", httpAddr) + transporter := raft.NewGrpcTransporter(grpcDialOption) + glog.V(0).Infof("Starting RaftServer with %v", serverAddr) // Clear old cluster configurations if peers are changed - if oldPeers, changed := isPeersChanged(s.dataDir, httpAddr, s.peers); changed { + if oldPeers, changed := isPeersChanged(s.dataDir, serverAddr, s.peers); changed { glog.V(0).Infof("Peers Change: %v => %v", oldPeers, s.peers) os.RemoveAll(path.Join(s.dataDir, "conf")) os.RemoveAll(path.Join(s.dataDir, "log")) os.RemoveAll(path.Join(s.dataDir, "snapshot")) } - s.raftServer, err = raft.NewServer(s.httpAddr, s.dataDir, transporter, nil, topo, "") + s.raftServer, err = raft.NewServer(s.serverAddr, s.dataDir, transporter, nil, topo, "") if err != nil { glog.V(0).Infoln(err) return nil } - transporter.Install(s.raftServer, s) s.raftServer.SetHeartbeatInterval(500 * time.Millisecond) s.raftServer.SetElectionTimeout(time.Duration(pulseSeconds) * 500 * time.Millisecond) s.raftServer.Start() - s.router.HandleFunc("/cluster/status", s.statusHandler).Methods("GET") - for _, peer := range s.peers { - s.raftServer.AddPeer(peer, "http://"+peer) + s.raftServer.AddPeer(peer, pb.ServerToGrpcAddress(peer)) } - time.Sleep(time.Duration(1000+rand.Int31n(3000)) * time.Millisecond) - if s.raftServer.IsLogEmpty() { + + s.GrpcServer = raft.NewGrpcServer(s.raftServer) + + if s.raftServer.IsLogEmpty() && isTheFirstOne(serverAddr, s.peers) { // Initialize the server by joining itself. glog.V(0).Infoln("Initializing new cluster") _, err := s.raftServer.Do(&raft.DefaultJoinCommand{ Name: s.raftServer.Name(), - ConnectionString: "http://" + s.httpAddr, + ConnectionString: pb.ServerToGrpcAddress(s.serverAddr), }) if err != nil { @@ -94,7 +93,7 @@ func (s *RaftServer) Peers() (members []string) { peers := s.raftServer.Peers() for _, p := range peers { - members = append(members, strings.TrimPrefix(p.ConnectionString, "http://")) + members = append(members, p.Name) } return @@ -113,7 +112,7 @@ func isPeersChanged(dir string, self string, peers []string) (oldPeers []string, } for _, p := range conf.Peers { - oldPeers = append(oldPeers, strings.TrimPrefix(p.ConnectionString, "http://")) + oldPeers = append(oldPeers, p.Name) } oldPeers = append(oldPeers, self) @@ -127,3 +126,11 @@ func isPeersChanged(dir string, self string, peers []string) (oldPeers []string, return oldPeers, !reflect.DeepEqual(peers, oldPeers) } + +func isTheFirstOne(self string, peers []string) bool { + sort.Strings(peers) + if len(peers) <= 0 { + return true + } + return self == peers[0] +} diff --git a/weed/server/raft_server_handlers.go b/weed/server/raft_server_handlers.go index 627fe354e..fd38cb977 100644 --- a/weed/server/raft_server_handlers.go +++ b/weed/server/raft_server_handlers.go @@ -1,16 +1,17 @@ package weed_server import ( - "github.com/chrislusf/seaweedfs/weed/operation" "net/http" ) -func (s *RaftServer) HandleFunc(pattern string, handler func(http.ResponseWriter, *http.Request)) { - s.router.HandleFunc(pattern, handler) +type ClusterStatusResult struct { + IsLeader bool `json:"IsLeader,omitempty"` + Leader string `json:"Leader,omitempty"` + Peers []string `json:"Peers,omitempty"` } -func (s *RaftServer) statusHandler(w http.ResponseWriter, r *http.Request) { - ret := operation.ClusterStatusResult{ +func (s *RaftServer) StatusHandler(w http.ResponseWriter, r *http.Request) { + ret := ClusterStatusResult{ IsLeader: s.topo.IsLeader(), Peers: s.Peers(), } diff --git a/weed/server/volume_grpc_admin.go b/weed/server/volume_grpc_admin.go index f0b57b35a..27b21ac09 100644 --- a/weed/server/volume_grpc_admin.go +++ b/weed/server/volume_grpc_admin.go @@ -2,10 +2,14 @@ package weed_server import ( "context" + "fmt" + "path/filepath" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" ) func (vs *VolumeServer) DeleteCollection(ctx context.Context, req *volume_server_pb.DeleteCollectionRequest) (*volume_server_pb.DeleteCollectionResponse, error) { @@ -24,17 +28,18 @@ func (vs *VolumeServer) DeleteCollection(ctx context.Context, req *volume_server } -func (vs *VolumeServer) AssignVolume(ctx context.Context, req *volume_server_pb.AssignVolumeRequest) (*volume_server_pb.AssignVolumeResponse, error) { +func (vs *VolumeServer) AllocateVolume(ctx context.Context, req *volume_server_pb.AllocateVolumeRequest) (*volume_server_pb.AllocateVolumeResponse, error) { - resp := &volume_server_pb.AssignVolumeResponse{} + resp := &volume_server_pb.AllocateVolumeResponse{} err := vs.store.AddVolume( - storage.VolumeId(req.VolumdId), + needle.VolumeId(req.VolumeId), req.Collection, vs.needleMapKind, req.Replication, req.Ttl, req.Preallocate, + req.MemoryMapMaxSizeMb, ) if err != nil { @@ -51,7 +56,7 @@ func (vs *VolumeServer) VolumeMount(ctx context.Context, req *volume_server_pb.V resp := &volume_server_pb.VolumeMountResponse{} - err := vs.store.MountVolume(storage.VolumeId(req.VolumdId)) + err := vs.store.MountVolume(needle.VolumeId(req.VolumeId)) if err != nil { glog.Errorf("volume mount %v: %v", req, err) @@ -67,7 +72,7 @@ func (vs *VolumeServer) VolumeUnmount(ctx context.Context, req *volume_server_pb resp := &volume_server_pb.VolumeUnmountResponse{} - err := vs.store.UnmountVolume(storage.VolumeId(req.VolumdId)) + err := vs.store.UnmountVolume(needle.VolumeId(req.VolumeId)) if err != nil { glog.Errorf("volume unmount %v: %v", req, err) @@ -78,3 +83,86 @@ func (vs *VolumeServer) VolumeUnmount(ctx context.Context, req *volume_server_pb return resp, err } + +func (vs *VolumeServer) VolumeDelete(ctx context.Context, req *volume_server_pb.VolumeDeleteRequest) (*volume_server_pb.VolumeDeleteResponse, error) { + + resp := &volume_server_pb.VolumeDeleteResponse{} + + err := vs.store.DeleteVolume(needle.VolumeId(req.VolumeId)) + + if err != nil { + glog.Errorf("volume delete %v: %v", req, err) + } else { + glog.V(2).Infof("volume delete %v", req) + } + + return resp, err + +} + +func (vs *VolumeServer) VolumeConfigure(ctx context.Context, req *volume_server_pb.VolumeConfigureRequest) (*volume_server_pb.VolumeConfigureResponse, error) { + + resp := &volume_server_pb.VolumeConfigureResponse{} + + // check replication format + if _, err := super_block.NewReplicaPlacementFromString(req.Replication); err != nil { + resp.Error = fmt.Sprintf("volume configure replication %v: %v", req, err) + return resp, nil + } + + // unmount + if err := vs.store.UnmountVolume(needle.VolumeId(req.VolumeId)); err != nil { + glog.Errorf("volume configure unmount %v: %v", req, err) + resp.Error = fmt.Sprintf("volume configure unmount %v: %v", req, err) + return resp, nil + } + + // modify the volume info file + if err := vs.store.ConfigureVolume(needle.VolumeId(req.VolumeId), req.Replication); err != nil { + glog.Errorf("volume configure %v: %v", req, err) + resp.Error = fmt.Sprintf("volume configure %v: %v", req, err) + return resp, nil + } + + // mount + if err := vs.store.MountVolume(needle.VolumeId(req.VolumeId)); err != nil { + glog.Errorf("volume configure mount %v: %v", req, err) + resp.Error = fmt.Sprintf("volume configure mount %v: %v", req, err) + return resp, nil + } + + return resp, nil + +} + +func (vs *VolumeServer) VolumeMarkReadonly(ctx context.Context, req *volume_server_pb.VolumeMarkReadonlyRequest) (*volume_server_pb.VolumeMarkReadonlyResponse, error) { + + resp := &volume_server_pb.VolumeMarkReadonlyResponse{} + + err := vs.store.MarkVolumeReadonly(needle.VolumeId(req.VolumeId)) + + if err != nil { + glog.Errorf("volume mark readonly %v: %v", req, err) + } else { + glog.V(2).Infof("volume mark readonly %v", req) + } + + return resp, err + +} + +func (vs *VolumeServer) VolumeServerStatus(ctx context.Context, req *volume_server_pb.VolumeServerStatusRequest) (*volume_server_pb.VolumeServerStatusResponse, error) { + + resp := &volume_server_pb.VolumeServerStatusResponse{} + + for _, loc := range vs.store.Locations { + if dir, e := filepath.Abs(loc.Directory); e == nil { + resp.DiskStatuses = append(resp.DiskStatuses, stats.NewDiskStatus(dir)) + } + } + + resp.MemoryStatus = stats.MemStat() + + return resp, nil + +} diff --git a/weed/server/volume_grpc_batch_delete.go b/weed/server/volume_grpc_batch_delete.go index 3554d97ae..fdb7937d2 100644 --- a/weed/server/volume_grpc_batch_delete.go +++ b/weed/server/volume_grpc_batch_delete.go @@ -7,7 +7,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) func (vs *VolumeServer) BatchDelete(ctx context.Context, req *volume_server_pb.BatchDeleteRequest) (*volume_server_pb.BatchDeleteResponse, error) { @@ -26,8 +26,8 @@ func (vs *VolumeServer) BatchDelete(ctx context.Context, req *volume_server_pb.B continue } - n := new(storage.Needle) - volumeId, _ := storage.NewVolumeId(vid) + n := new(needle.Needle) + volumeId, _ := needle.NewVolumeId(vid) n.ParsePath(id_cookie) cookie := n.Cookie @@ -58,7 +58,7 @@ func (vs *VolumeServer) BatchDelete(ctx context.Context, req *volume_server_pb.B break } n.LastModified = now - if size, err := vs.store.Delete(volumeId, n); err != nil { + if size, err := vs.store.DeleteVolumeNeedle(volumeId, n); err != nil { resp.Results = append(resp.Results, &volume_server_pb.DeleteResult{ FileId: fid, Status: http.StatusInternalServerError, diff --git a/weed/server/volume_grpc_client_to_master.go b/weed/server/volume_grpc_client_to_master.go index 219948936..1f4d9df10 100644 --- a/weed/server/volume_grpc_client_to_master.go +++ b/weed/server/volume_grpc_client_to_master.go @@ -2,13 +2,21 @@ package weed_server import ( "fmt" + "net" "time" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + + "golang.org/x/net/context" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/master_pb" - "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/util" - "golang.org/x/net/context" ) func (vs *VolumeServer) GetMaster() string { @@ -16,31 +24,41 @@ func (vs *VolumeServer) GetMaster() string { } func (vs *VolumeServer) heartbeat() { - glog.V(0).Infof("Volume server start with masters: %v", vs.MasterNodes) + glog.V(0).Infof("Volume server start with seed master nodes: %v", vs.SeedMasterNodes) vs.store.SetDataCenter(vs.dataCenter) vs.store.SetRack(vs.rack) + grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.volume") + var err error var newLeader string for { - for _, master := range vs.MasterNodes { + for _, master := range vs.SeedMasterNodes { if newLeader != "" { master = newLeader } - newLeader, err = vs.doHeartbeat(master, time.Duration(vs.pulseSeconds)*time.Second) + masterGrpcAddress, parseErr := pb.ParseServerToGrpcAddress(master) + if parseErr != nil { + glog.V(0).Infof("failed to parse master grpc %v: %v", masterGrpcAddress, parseErr) + continue + } + vs.store.MasterAddress = master + newLeader, err = vs.doHeartbeat(master, masterGrpcAddress, grpcDialOption, time.Duration(vs.pulseSeconds)*time.Second) if err != nil { glog.V(0).Infof("heartbeat error: %v", err) time.Sleep(time.Duration(vs.pulseSeconds) * time.Second) + newLeader = "" + vs.store.MasterAddress = "" } } } } -func (vs *VolumeServer) doHeartbeat(masterNode string, sleepInterval time.Duration) (newLeader string, err error) { +func (vs *VolumeServer) doHeartbeat(masterNode, masterGrpcAddress string, grpcDialOption grpc.DialOption, sleepInterval time.Duration) (newLeader string, err error) { - grpcConection, err := util.GrpcDial(masterNode) + grpcConection, err := pb.GrpcDial(context.Background(), masterGrpcAddress, grpcDialOption) if err != nil { - return "", fmt.Errorf("fail to dial: %v", err) + return "", fmt.Errorf("fail to dial %s : %v", masterNode, err) } defer grpcConection.Close() @@ -53,9 +71,6 @@ func (vs *VolumeServer) doHeartbeat(masterNode string, sleepInterval time.Durati glog.V(0).Infof("Heartbeat to: %v", masterNode) vs.currentMaster = masterNode - vs.store.Client = stream - defer func() { vs.store.Client = nil }() - doneChan := make(chan error, 1) go func() { @@ -66,17 +81,21 @@ func (vs *VolumeServer) doHeartbeat(masterNode string, sleepInterval time.Durati return } if in.GetVolumeSizeLimit() != 0 { - vs.store.VolumeSizeLimit = in.GetVolumeSizeLimit() + vs.store.SetVolumeSizeLimit(in.GetVolumeSizeLimit()) } - if in.GetSecretKey() != "" { - vs.guard.SecretKey = security.Secret(in.GetSecretKey()) - } - if in.GetLeader() != "" && masterNode != in.GetLeader() { + if in.GetLeader() != "" && masterNode != in.GetLeader() && !isSameIP(in.GetLeader(), masterNode) { glog.V(0).Infof("Volume Server found a new master newLeader: %v instead of %v", in.GetLeader(), masterNode) newLeader = in.GetLeader() doneChan <- nil return } + if in.GetMetricsAddress() != "" && vs.MetricsAddress != in.GetMetricsAddress() { + vs.MetricsAddress = in.GetMetricsAddress() + vs.MetricsIntervalSec = int(in.GetMetricsIntervalSeconds()) + } + if len(in.StorageBackends) > 0 { + backend.LoadFromPbStorageBackends(in.StorageBackends) + } } }() @@ -85,33 +104,89 @@ func (vs *VolumeServer) doHeartbeat(masterNode string, sleepInterval time.Durati return "", err } - tickChan := time.Tick(sleepInterval) + if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil { + glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err) + return "", err + } + + volumeTickChan := time.Tick(sleepInterval) + ecShardTickChan := time.Tick(17 * sleepInterval) for { select { - case vid := <-vs.store.NewVolumeIdChan: + case volumeMessage := <-vs.store.NewVolumesChan: + deltaBeat := &master_pb.Heartbeat{ + NewVolumes: []*master_pb.VolumeShortInformationMessage{ + &volumeMessage, + }, + } + glog.V(1).Infof("volume server %s:%d adds volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id) + if err = stream.Send(deltaBeat); err != nil { + glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err) + return "", err + } + case ecShardMessage := <-vs.store.NewEcShardsChan: + deltaBeat := &master_pb.Heartbeat{ + NewEcShards: []*master_pb.VolumeEcShardInformationMessage{ + &ecShardMessage, + }, + } + glog.V(1).Infof("volume server %s:%d adds ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id, + erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds()) + if err = stream.Send(deltaBeat); err != nil { + glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err) + return "", err + } + case volumeMessage := <-vs.store.DeletedVolumesChan: deltaBeat := &master_pb.Heartbeat{ - NewVids: []uint32{uint32(vid)}, + DeletedVolumes: []*master_pb.VolumeShortInformationMessage{ + &volumeMessage, + }, } + glog.V(1).Infof("volume server %s:%d deletes volume %d", vs.store.Ip, vs.store.Port, volumeMessage.Id) if err = stream.Send(deltaBeat); err != nil { glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err) return "", err } - case vid := <-vs.store.DeletedVolumeIdChan: + case ecShardMessage := <-vs.store.DeletedEcShardsChan: deltaBeat := &master_pb.Heartbeat{ - DeletedVids: []uint32{uint32(vid)}, + DeletedEcShards: []*master_pb.VolumeEcShardInformationMessage{ + &ecShardMessage, + }, } + glog.V(1).Infof("volume server %s:%d deletes ec shard %d:%d", vs.store.Ip, vs.store.Port, ecShardMessage.Id, + erasure_coding.ShardBits(ecShardMessage.EcIndexBits).ShardIds()) if err = stream.Send(deltaBeat); err != nil { glog.V(0).Infof("Volume Server Failed to update to master %s: %v", masterNode, err) return "", err } - case <-tickChan: + case <-volumeTickChan: + glog.V(4).Infof("volume server %s:%d heartbeat", vs.store.Ip, vs.store.Port) if err = stream.Send(vs.store.CollectHeartbeat()); err != nil { glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err) return "", err } - case <-doneChan: + case <-ecShardTickChan: + glog.V(4).Infof("volume server %s:%d ec heartbeat", vs.store.Ip, vs.store.Port) + if err = stream.Send(vs.store.CollectErasureCodingHeartbeat()); err != nil { + glog.V(0).Infof("Volume Server Failed to talk with master %s: %v", masterNode, err) + return "", err + } + case err = <-doneChan: return } } } + +func isSameIP(ip string, host string) bool { + ips, err := net.LookupIP(host) + if err != nil { + return false + } + for _, t := range ips { + if ip == t.String() { + return true + } + } + return false +} diff --git a/weed/server/volume_grpc_copy.go b/weed/server/volume_grpc_copy.go new file mode 100644 index 000000000..5cc75e74c --- /dev/null +++ b/weed/server/volume_grpc_copy.go @@ -0,0 +1,266 @@ +package weed_server + +import ( + "context" + "fmt" + "io" + "math" + "os" + "path" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/util" +) + +const BufferSizeLimit = 1024 * 1024 * 2 + +// VolumeCopy copy the .idx .dat .vif files, and mount the volume +func (vs *VolumeServer) VolumeCopy(ctx context.Context, req *volume_server_pb.VolumeCopyRequest) (*volume_server_pb.VolumeCopyResponse, error) { + + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v != nil { + return nil, fmt.Errorf("volume %d already exists", req.VolumeId) + } + + location := vs.store.FindFreeLocation() + if location == nil { + return nil, fmt.Errorf("no space left") + } + + // the master will not start compaction for read-only volumes, so it is safe to just copy files directly + // copy .dat and .idx files + // read .idx .dat file size and timestamp + // send .idx file + // send .dat file + // confirm size and timestamp + var volFileInfoResp *volume_server_pb.ReadVolumeFileStatusResponse + var volumeFileName, idxFileName, datFileName string + err := operation.WithVolumeServerClient(req.SourceDataNode, vs.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + var err error + volFileInfoResp, err = client.ReadVolumeFileStatus(context.Background(), + &volume_server_pb.ReadVolumeFileStatusRequest{ + VolumeId: req.VolumeId, + }) + if nil != err { + return fmt.Errorf("read volume file status failed, %v", err) + } + + volumeFileName = storage.VolumeFileName(location.Directory, volFileInfoResp.Collection, int(req.VolumeId)) + + // println("source:", volFileInfoResp.String()) + // copy ecx file + if err := vs.doCopyFile(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.IdxFileSize, volumeFileName, ".idx", false, false); err != nil { + return err + } + + if err := vs.doCopyFile(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.DatFileSize, volumeFileName, ".dat", false, true); err != nil { + return err + } + + if err := vs.doCopyFile(client, false, req.Collection, req.VolumeId, volFileInfoResp.CompactionRevision, volFileInfoResp.DatFileSize, volumeFileName, ".vif", false, true); err != nil { + return err + } + + return nil + }) + + idxFileName = volumeFileName + ".idx" + datFileName = volumeFileName + ".dat" + + if err != nil && volumeFileName != "" { + os.Remove(idxFileName) + os.Remove(datFileName) + os.Remove(volumeFileName + ".vif") + return nil, err + } + + if err = checkCopyFiles(volFileInfoResp, idxFileName, datFileName); err != nil { // added by panyc16 + return nil, err + } + + // mount the volume + err = vs.store.MountVolume(needle.VolumeId(req.VolumeId)) + if err != nil { + return nil, fmt.Errorf("failed to mount volume %d: %v", req.VolumeId, err) + } + + return &volume_server_pb.VolumeCopyResponse{ + LastAppendAtNs: volFileInfoResp.DatFileTimestampSeconds * uint64(time.Second), + }, err +} + +func (vs *VolumeServer) doCopyFile(client volume_server_pb.VolumeServerClient, isEcVolume bool, collection string, vid, compactRevision uint32, stopOffset uint64, baseFileName, ext string, isAppend, ignoreSourceFileNotFound bool) error { + + copyFileClient, err := client.CopyFile(context.Background(), &volume_server_pb.CopyFileRequest{ + VolumeId: vid, + Ext: ext, + CompactionRevision: compactRevision, + StopOffset: stopOffset, + Collection: collection, + IsEcVolume: isEcVolume, + IgnoreSourceFileNotFound: ignoreSourceFileNotFound, + }) + if err != nil { + return fmt.Errorf("failed to start copying volume %d %s file: %v", vid, ext, err) + } + + err = writeToFile(copyFileClient, baseFileName+ext, util.NewWriteThrottler(vs.compactionBytePerSecond), isAppend) + if err != nil { + return fmt.Errorf("failed to copy %s file: %v", baseFileName+ext, err) + } + + return nil + +} + +/** +only check the the differ of the file size +todo: maybe should check the received count and deleted count of the volume +*/ +func checkCopyFiles(originFileInf *volume_server_pb.ReadVolumeFileStatusResponse, idxFileName, datFileName string) error { + stat, err := os.Stat(idxFileName) + if err != nil { + return fmt.Errorf("stat idx file %s failed, %v", idxFileName, err) + } + if originFileInf.IdxFileSize != uint64(stat.Size()) { + return fmt.Errorf("idx file %s size [%v] is not same as origin file size [%v]", + idxFileName, stat.Size(), originFileInf.IdxFileSize) + } + + stat, err = os.Stat(datFileName) + if err != nil { + return fmt.Errorf("get dat file info failed, %v", err) + } + if originFileInf.DatFileSize != uint64(stat.Size()) { + return fmt.Errorf("the dat file size [%v] is not same as origin file size [%v]", + stat.Size(), originFileInf.DatFileSize) + } + return nil +} + +func writeToFile(client volume_server_pb.VolumeServer_CopyFileClient, fileName string, wt *util.WriteThrottler, isAppend bool) error { + glog.V(4).Infof("writing to %s", fileName) + flags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC + if isAppend { + flags = os.O_WRONLY | os.O_CREATE + } + dst, err := os.OpenFile(fileName, flags, 0644) + if err != nil { + return nil + } + defer dst.Close() + + for { + resp, receiveErr := client.Recv() + if receiveErr == io.EOF { + break + } + if receiveErr != nil { + return fmt.Errorf("receiving %s: %v", fileName, receiveErr) + } + dst.Write(resp.FileContent) + wt.MaybeSlowdown(int64(len(resp.FileContent))) + } + return nil +} + +func (vs *VolumeServer) ReadVolumeFileStatus(ctx context.Context, req *volume_server_pb.ReadVolumeFileStatusRequest) (*volume_server_pb.ReadVolumeFileStatusResponse, error) { + resp := &volume_server_pb.ReadVolumeFileStatusResponse{} + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v == nil { + return nil, fmt.Errorf("not found volume id %d", req.VolumeId) + } + + resp.VolumeId = req.VolumeId + datSize, idxSize, modTime := v.FileStat() + resp.DatFileSize = datSize + resp.IdxFileSize = idxSize + resp.DatFileTimestampSeconds = uint64(modTime.Unix()) + resp.IdxFileTimestampSeconds = uint64(modTime.Unix()) + resp.FileCount = v.FileCount() + resp.CompactionRevision = uint32(v.CompactionRevision) + resp.Collection = v.Collection + return resp, nil +} + +// CopyFile client pulls the volume related file from the source server. +// if req.CompactionRevision != math.MaxUint32, it ensures the compact revision is as expected +// The copying still stop at req.StopOffset, but you can set it to math.MaxUint64 in order to read all data. +func (vs *VolumeServer) CopyFile(req *volume_server_pb.CopyFileRequest, stream volume_server_pb.VolumeServer_CopyFileServer) error { + + var fileName string + if !req.IsEcVolume { + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v == nil { + return fmt.Errorf("not found volume id %d", req.VolumeId) + } + + if uint32(v.CompactionRevision) != req.CompactionRevision && req.CompactionRevision != math.MaxUint32 { + return fmt.Errorf("volume %d is compacted", req.VolumeId) + } + fileName = v.FileName() + req.Ext + } else { + baseFileName := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId)) + req.Ext + for _, location := range vs.store.Locations { + tName := path.Join(location.Directory, baseFileName) + if util.FileExists(tName) { + fileName = tName + } + } + if fileName == "" { + if req.IgnoreSourceFileNotFound { + return nil + } + return fmt.Errorf("CopyFile not found ec volume id %d", req.VolumeId) + } + } + + bytesToRead := int64(req.StopOffset) + + file, err := os.Open(fileName) + if err != nil { + if req.IgnoreSourceFileNotFound && err == os.ErrNotExist { + return nil + } + return err + } + defer file.Close() + + buffer := make([]byte, BufferSizeLimit) + + for bytesToRead > 0 { + bytesread, err := file.Read(buffer) + + // println(fileName, "read", bytesread, "bytes, with target", bytesToRead) + + if err != nil { + if err != io.EOF { + return err + } + // println(fileName, "read", bytesread, "bytes, with target", bytesToRead, "err", err.Error()) + break + } + + if int64(bytesread) > bytesToRead { + bytesread = int(bytesToRead) + } + err = stream.Send(&volume_server_pb.CopyFileResponse{ + FileContent: buffer[:bytesread], + }) + if err != nil { + // println("sending", bytesread, "bytes err", err.Error()) + return err + } + + bytesToRead -= int64(bytesread) + + } + + return nil +} diff --git a/weed/server/volume_grpc_copy_incremental.go b/weed/server/volume_grpc_copy_incremental.go new file mode 100644 index 000000000..6d6c3daa3 --- /dev/null +++ b/weed/server/volume_grpc_copy_incremental.go @@ -0,0 +1,66 @@ +package weed_server + +import ( + "context" + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func (vs *VolumeServer) VolumeIncrementalCopy(req *volume_server_pb.VolumeIncrementalCopyRequest, stream volume_server_pb.VolumeServer_VolumeIncrementalCopyServer) error { + + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v == nil { + return fmt.Errorf("not found volume id %d", req.VolumeId) + } + + stopOffset, _, _ := v.FileStat() + foundOffset, isLastOne, err := v.BinarySearchByAppendAtNs(req.SinceNs) + if err != nil { + return fmt.Errorf("fail to locate by appendAtNs %d: %s", req.SinceNs, err) + } + + if isLastOne { + return nil + } + + startOffset := foundOffset.ToAcutalOffset() + + buf := make([]byte, 1024*1024*2) + return sendFileContent(v.DataBackend, buf, startOffset, int64(stopOffset), stream) + +} + +func (vs *VolumeServer) VolumeSyncStatus(ctx context.Context, req *volume_server_pb.VolumeSyncStatusRequest) (*volume_server_pb.VolumeSyncStatusResponse, error) { + + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v == nil { + return nil, fmt.Errorf("not found volume id %d", req.VolumeId) + } + + resp := v.GetVolumeSyncStatus() + + return resp, nil + +} + +func sendFileContent(datBackend backend.BackendStorageFile, buf []byte, startOffset, stopOffset int64, stream volume_server_pb.VolumeServer_VolumeIncrementalCopyServer) error { + var blockSizeLimit = int64(len(buf)) + for i := int64(0); i < stopOffset-startOffset; i += blockSizeLimit { + n, readErr := datBackend.ReadAt(buf, startOffset+i) + if readErr == nil || readErr == io.EOF { + resp := &volume_server_pb.VolumeIncrementalCopyResponse{} + resp.FileContent = buf[:int64(n)] + sendErr := stream.Send(resp) + if sendErr != nil { + return sendErr + } + } else { + return readErr + } + } + return nil +} diff --git a/weed/server/volume_grpc_erasure_coding.go b/weed/server/volume_grpc_erasure_coding.go new file mode 100644 index 000000000..66dd5bf8d --- /dev/null +++ b/weed/server/volume_grpc_erasure_coding.go @@ -0,0 +1,379 @@ +package weed_server + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "math" + "os" + "path" + "path/filepath" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" +) + +/* + +Steps to apply erasure coding to .dat .idx files +0. ensure the volume is readonly +1. client call VolumeEcShardsGenerate to generate the .ecx and .ec00 ~ .ec13 files +2. client ask master for possible servers to hold the ec files, at least 4 servers +3. client call VolumeEcShardsCopy on above target servers to copy ec files from the source server +4. target servers report the new ec files to the master +5. master stores vid -> [14]*DataNode +6. client checks master. If all 14 slices are ready, delete the original .idx, .idx files + +*/ + +// VolumeEcShardsGenerate generates the .ecx and .ec00 ~ .ec13 files +func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_server_pb.VolumeEcShardsGenerateRequest) (*volume_server_pb.VolumeEcShardsGenerateResponse, error) { + + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v == nil { + return nil, fmt.Errorf("volume %d not found", req.VolumeId) + } + baseFileName := v.FileName() + + if v.Collection != req.Collection { + return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection) + } + + // write .ecx file + if err := erasure_coding.WriteSortedFileFromIdx(baseFileName, ".ecx"); err != nil { + return nil, fmt.Errorf("WriteSortedFileFromIdx %s: %v", baseFileName, err) + } + + // write .ec00 ~ .ec13 files + if err := erasure_coding.WriteEcFiles(baseFileName); err != nil { + return nil, fmt.Errorf("WriteEcFiles %s: %v", baseFileName, err) + } + + // write .vif files + if err := pb.SaveVolumeInfo(baseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(v.Version())}); err != nil { + return nil, fmt.Errorf("WriteEcFiles %s: %v", baseFileName, err) + } + + return &volume_server_pb.VolumeEcShardsGenerateResponse{}, nil +} + +// VolumeEcShardsRebuild generates the any of the missing .ec00 ~ .ec13 files +func (vs *VolumeServer) VolumeEcShardsRebuild(ctx context.Context, req *volume_server_pb.VolumeEcShardsRebuildRequest) (*volume_server_pb.VolumeEcShardsRebuildResponse, error) { + + baseFileName := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId)) + + var rebuiltShardIds []uint32 + + for _, location := range vs.store.Locations { + if util.FileExists(path.Join(location.Directory, baseFileName+".ecx")) { + // write .ec00 ~ .ec13 files + baseFileName = path.Join(location.Directory, baseFileName) + if generatedShardIds, err := erasure_coding.RebuildEcFiles(baseFileName); err != nil { + return nil, fmt.Errorf("RebuildEcFiles %s: %v", baseFileName, err) + } else { + rebuiltShardIds = generatedShardIds + } + + if err := erasure_coding.RebuildEcxFile(baseFileName); err != nil { + return nil, fmt.Errorf("RebuildEcxFile %s: %v", baseFileName, err) + } + + break + } + } + + return &volume_server_pb.VolumeEcShardsRebuildResponse{ + RebuiltShardIds: rebuiltShardIds, + }, nil +} + +// VolumeEcShardsCopy copy the .ecx and some ec data slices +func (vs *VolumeServer) VolumeEcShardsCopy(ctx context.Context, req *volume_server_pb.VolumeEcShardsCopyRequest) (*volume_server_pb.VolumeEcShardsCopyResponse, error) { + + location := vs.store.FindFreeLocation() + if location == nil { + return nil, fmt.Errorf("no space left") + } + + baseFileName := storage.VolumeFileName(location.Directory, req.Collection, int(req.VolumeId)) + + err := operation.WithVolumeServerClient(req.SourceDataNode, vs.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + + // copy ec data slices + for _, shardId := range req.ShardIds { + if err := vs.doCopyFile(client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, erasure_coding.ToExt(int(shardId)), false, false); err != nil { + return err + } + } + + if req.CopyEcxFile { + + // copy ecx file + if err := vs.doCopyFile(client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, ".ecx", false, false); err != nil { + return err + } + return nil + } + + if req.CopyEcjFile { + // copy ecj file + if err := vs.doCopyFile(client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, ".ecj", true, true); err != nil { + return err + } + } + + if req.CopyVifFile { + // copy vif file + if err := vs.doCopyFile(client, true, req.Collection, req.VolumeId, math.MaxUint32, math.MaxInt64, baseFileName, ".vif", false, true); err != nil { + return err + } + } + + return nil + }) + if err != nil { + return nil, fmt.Errorf("VolumeEcShardsCopy volume %d: %v", req.VolumeId, err) + } + + return &volume_server_pb.VolumeEcShardsCopyResponse{}, nil +} + +// VolumeEcShardsDelete local delete the .ecx and some ec data slices if not needed +// the shard should not be mounted before calling this. +func (vs *VolumeServer) VolumeEcShardsDelete(ctx context.Context, req *volume_server_pb.VolumeEcShardsDeleteRequest) (*volume_server_pb.VolumeEcShardsDeleteResponse, error) { + + baseFilename := erasure_coding.EcShardBaseFileName(req.Collection, int(req.VolumeId)) + + glog.V(0).Infof("ec volume %d shard delete %v", req.VolumeId, req.ShardIds) + + found := false + for _, location := range vs.store.Locations { + if util.FileExists(path.Join(location.Directory, baseFilename+".ecx")) { + found = true + baseFilename = path.Join(location.Directory, baseFilename) + for _, shardId := range req.ShardIds { + os.Remove(baseFilename + erasure_coding.ToExt(int(shardId))) + } + break + } + } + + if !found { + return nil, nil + } + + // check whether to delete the .ecx and .ecj file also + hasEcxFile := false + hasIdxFile := false + existingShardCount := 0 + + bName := filepath.Base(baseFilename) + for _, location := range vs.store.Locations { + fileInfos, err := ioutil.ReadDir(location.Directory) + if err != nil { + continue + } + for _, fileInfo := range fileInfos { + if fileInfo.Name() == bName+".ecx" || fileInfo.Name() == bName+".ecj" { + hasEcxFile = true + continue + } + if fileInfo.Name() == bName+".idx" { + hasIdxFile = true + continue + } + if strings.HasPrefix(fileInfo.Name(), bName+".ec") { + existingShardCount++ + } + } + } + + if hasEcxFile && existingShardCount == 0 { + if err := os.Remove(baseFilename + ".ecx"); err != nil { + return nil, err + } + if err := os.Remove(baseFilename + ".ecj"); err != nil { + return nil, err + } + } + if !hasIdxFile { + // .vif is used for ec volumes and normal volumes + os.Remove(baseFilename + ".vif") + } + + return &volume_server_pb.VolumeEcShardsDeleteResponse{}, nil +} + +func (vs *VolumeServer) VolumeEcShardsMount(ctx context.Context, req *volume_server_pb.VolumeEcShardsMountRequest) (*volume_server_pb.VolumeEcShardsMountResponse, error) { + + for _, shardId := range req.ShardIds { + err := vs.store.MountEcShards(req.Collection, needle.VolumeId(req.VolumeId), erasure_coding.ShardId(shardId)) + + if err != nil { + glog.Errorf("ec shard mount %v: %v", req, err) + } else { + glog.V(2).Infof("ec shard mount %v", req) + } + + if err != nil { + return nil, fmt.Errorf("mount %d.%d: %v", req.VolumeId, shardId, err) + } + } + + return &volume_server_pb.VolumeEcShardsMountResponse{}, nil +} + +func (vs *VolumeServer) VolumeEcShardsUnmount(ctx context.Context, req *volume_server_pb.VolumeEcShardsUnmountRequest) (*volume_server_pb.VolumeEcShardsUnmountResponse, error) { + + for _, shardId := range req.ShardIds { + err := vs.store.UnmountEcShards(needle.VolumeId(req.VolumeId), erasure_coding.ShardId(shardId)) + + if err != nil { + glog.Errorf("ec shard unmount %v: %v", req, err) + } else { + glog.V(2).Infof("ec shard unmount %v", req) + } + + if err != nil { + return nil, fmt.Errorf("unmount %d.%d: %v", req.VolumeId, shardId, err) + } + } + + return &volume_server_pb.VolumeEcShardsUnmountResponse{}, nil +} + +func (vs *VolumeServer) VolumeEcShardRead(req *volume_server_pb.VolumeEcShardReadRequest, stream volume_server_pb.VolumeServer_VolumeEcShardReadServer) error { + + ecVolume, found := vs.store.FindEcVolume(needle.VolumeId(req.VolumeId)) + if !found { + return fmt.Errorf("VolumeEcShardRead not found ec volume id %d", req.VolumeId) + } + ecShard, found := ecVolume.FindEcVolumeShard(erasure_coding.ShardId(req.ShardId)) + if !found { + return fmt.Errorf("not found ec shard %d.%d", req.VolumeId, req.ShardId) + } + + if req.FileKey != 0 { + _, size, _ := ecVolume.FindNeedleFromEcx(types.Uint64ToNeedleId(req.FileKey)) + if size == types.TombstoneFileSize { + return stream.Send(&volume_server_pb.VolumeEcShardReadResponse{ + IsDeleted: true, + }) + } + } + + bufSize := req.Size + if bufSize > BufferSizeLimit { + bufSize = BufferSizeLimit + } + buffer := make([]byte, bufSize) + + startOffset, bytesToRead := req.Offset, req.Size + + for bytesToRead > 0 { + // min of bytesToRead and bufSize + bufferSize := bufSize + if bufferSize > bytesToRead { + bufferSize = bytesToRead + } + bytesread, err := ecShard.ReadAt(buffer[0:bufferSize], startOffset) + + // println("read", ecShard.FileName(), "startOffset", startOffset, bytesread, "bytes, with target", bufferSize) + if bytesread > 0 { + + if int64(bytesread) > bytesToRead { + bytesread = int(bytesToRead) + } + err = stream.Send(&volume_server_pb.VolumeEcShardReadResponse{ + Data: buffer[:bytesread], + }) + if err != nil { + // println("sending", bytesread, "bytes err", err.Error()) + return err + } + + startOffset += int64(bytesread) + bytesToRead -= int64(bytesread) + + } + + if err != nil { + if err != io.EOF { + return err + } + return nil + } + + } + + return nil + +} + +func (vs *VolumeServer) VolumeEcBlobDelete(ctx context.Context, req *volume_server_pb.VolumeEcBlobDeleteRequest) (*volume_server_pb.VolumeEcBlobDeleteResponse, error) { + + resp := &volume_server_pb.VolumeEcBlobDeleteResponse{} + + for _, location := range vs.store.Locations { + if localEcVolume, found := location.FindEcVolume(needle.VolumeId(req.VolumeId)); found { + + _, size, _, err := localEcVolume.LocateEcShardNeedle(types.NeedleId(req.FileKey), needle.Version(req.Version)) + if err != nil { + return nil, fmt.Errorf("locate in local ec volume: %v", err) + } + if size == types.TombstoneFileSize { + return resp, nil + } + + err = localEcVolume.DeleteNeedleFromEcx(types.NeedleId(req.FileKey)) + if err != nil { + return nil, err + } + + break + } + } + + return resp, nil +} + +// VolumeEcShardsToVolume generates the .idx, .dat files from .ecx, .ecj and .ec01 ~ .ec14 files +func (vs *VolumeServer) VolumeEcShardsToVolume(ctx context.Context, req *volume_server_pb.VolumeEcShardsToVolumeRequest) (*volume_server_pb.VolumeEcShardsToVolumeResponse, error) { + + v, found := vs.store.FindEcVolume(needle.VolumeId(req.VolumeId)) + if !found { + return nil, fmt.Errorf("ec volume %d not found", req.VolumeId) + } + baseFileName := v.FileName() + + if v.Collection != req.Collection { + return nil, fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection) + } + + // calculate .dat file size + datFileSize, err := erasure_coding.FindDatFileSize(baseFileName) + if err != nil { + return nil, fmt.Errorf("FindDatFileSize %s: %v", baseFileName, err) + } + + // write .dat file from .ec00 ~ .ec09 files + if err := erasure_coding.WriteDatFile(baseFileName, datFileSize); err != nil { + return nil, fmt.Errorf("WriteEcFiles %s: %v", baseFileName, err) + } + + // write .idx file from .ecx and .ecj files + if err := erasure_coding.WriteIdxFileFromEcIndex(baseFileName); err != nil { + return nil, fmt.Errorf("WriteIdxFileFromEcIndex %s: %v", baseFileName, err) + } + + return &volume_server_pb.VolumeEcShardsToVolumeResponse{}, nil +} diff --git a/weed/server/volume_grpc_file.go b/weed/server/volume_grpc_file.go new file mode 100644 index 000000000..4d71ddeb1 --- /dev/null +++ b/weed/server/volume_grpc_file.go @@ -0,0 +1,129 @@ +package weed_server + +import ( + "encoding/json" + "net/http" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func (vs *VolumeServer) FileGet(req *volume_server_pb.FileGetRequest, stream volume_server_pb.VolumeServer_FileGetServer) error { + + headResponse := &volume_server_pb.FileGetResponse{} + n := new(needle.Needle) + + commaIndex := strings.LastIndex(req.FileId, ",") + vid := req.FileId[:commaIndex] + fid := req.FileId[commaIndex+1:] + + volumeId, err := needle.NewVolumeId(vid) + if err != nil { + headResponse.ErrorCode = http.StatusBadRequest + return stream.Send(headResponse) + } + err = n.ParsePath(fid) + if err != nil { + headResponse.ErrorCode = http.StatusBadRequest + return stream.Send(headResponse) + } + + hasVolume := vs.store.HasVolume(volumeId) + _, hasEcVolume := vs.store.FindEcVolume(volumeId) + + if !hasVolume && !hasEcVolume { + headResponse.ErrorCode = http.StatusMovedPermanently + return stream.Send(headResponse) + } + + cookie := n.Cookie + var count int + if hasVolume { + count, err = vs.store.ReadVolumeNeedle(volumeId, n) + } else if hasEcVolume { + count, err = vs.store.ReadEcShardNeedle(volumeId, n) + } + + if err != nil || count < 0 { + headResponse.ErrorCode = http.StatusNotFound + return stream.Send(headResponse) + } + if n.Cookie != cookie { + headResponse.ErrorCode = http.StatusNotFound + return stream.Send(headResponse) + } + + if n.LastModified != 0 { + headResponse.LastModified = n.LastModified + } + + headResponse.Etag = n.Etag() + + if n.HasPairs() { + pairMap := make(map[string]string) + err = json.Unmarshal(n.Pairs, &pairMap) + if err != nil { + glog.V(0).Infoln("Unmarshal pairs error:", err) + } + headResponse.Headers = pairMap + } + + /* + // skip this, no redirection + if vs.tryHandleChunkedFile(n, filename, w, r) { + return + } + */ + + if n.NameSize > 0 { + headResponse.Filename = string(n.Name) + } + mtype := "" + if n.MimeSize > 0 { + mt := string(n.Mime) + if !strings.HasPrefix(mt, "application/octet-stream") { + mtype = mt + } + } + headResponse.ContentType = mtype + + headResponse.IsGzipped = n.IsGzipped() + + if n.IsGzipped() && req.AcceptGzip { + if n.Data, err = util.UnGzipData(n.Data); err != nil { + glog.V(0).Infof("ungzip %s error: %v", req.FileId, err) + } + } + + headResponse.ContentLength = uint32(len(n.Data)) + bytesToRead := len(n.Data) + bytesRead := 0 + + t := headResponse + + for bytesRead < bytesToRead { + + stopIndex := bytesRead + BufferSizeLimit + if stopIndex > bytesToRead { + stopIndex = bytesToRead + } + + if t == nil { + t = &volume_server_pb.FileGetResponse{} + } + t.Data = n.Data[bytesRead:stopIndex] + + err = stream.Send(t) + t = nil + if err != nil { + return err + } + + bytesRead = stopIndex + } + + return nil +} diff --git a/weed/server/volume_grpc_query.go b/weed/server/volume_grpc_query.go new file mode 100644 index 000000000..767e28e7b --- /dev/null +++ b/weed/server/volume_grpc_query.go @@ -0,0 +1,69 @@ +package weed_server + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/query/json" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/tidwall/gjson" +) + +func (vs *VolumeServer) Query(req *volume_server_pb.QueryRequest, stream volume_server_pb.VolumeServer_QueryServer) error { + + for _, fid := range req.FromFileIds { + + vid, id_cookie, err := operation.ParseFileId(fid) + if err != nil { + glog.V(0).Infof("volume query failed to parse fid %s: %v", fid, err) + return err + } + + n := new(needle.Needle) + volumeId, _ := needle.NewVolumeId(vid) + n.ParsePath(id_cookie) + + cookie := n.Cookie + if _, err := vs.store.ReadVolumeNeedle(volumeId, n); err != nil { + glog.V(0).Infof("volume query failed to read fid %s: %v", fid, err) + return err + } + + if n.Cookie != cookie { + glog.V(0).Infof("volume query failed to read fid cookie %s: %v", fid, err) + return err + } + + if req.InputSerialization.CsvInput != nil { + + } + + if req.InputSerialization.JsonInput != nil { + + stripe := &volume_server_pb.QueriedStripe{ + Records: nil, + } + + filter := json.Query{ + Field: req.Filter.Field, + Op: req.Filter.Operand, + Value: req.Filter.Value, + } + gjson.ForEachLine(string(n.Data), func(line gjson.Result) bool { + passedFilter, values := json.QueryJson(line.Raw, req.Selections, filter) + if !passedFilter { + return true + } + stripe.Records = json.ToJson(stripe.Records, req.Selections, values) + return true + }) + err = stream.Send(stripe) + if err != nil { + return err + } + } + + } + + return nil +} diff --git a/weed/server/volume_grpc_sync.go b/weed/server/volume_grpc_sync.go deleted file mode 100644 index 084069211..000000000 --- a/weed/server/volume_grpc_sync.go +++ /dev/null @@ -1,87 +0,0 @@ -package weed_server - -import ( - "context" - "fmt" - - "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - "github.com/chrislusf/seaweedfs/weed/storage" - "github.com/chrislusf/seaweedfs/weed/storage/types" -) - -func (vs *VolumeServer) VolumeSyncStatus(ctx context.Context, req *volume_server_pb.VolumeSyncStatusRequest) (*volume_server_pb.VolumeSyncStatusResponse, error) { - - v := vs.store.GetVolume(storage.VolumeId(req.VolumdId)) - if v == nil { - return nil, fmt.Errorf("Not Found Volume Id %d", req.VolumdId) - } - - resp := v.GetVolumeSyncStatus() - - glog.V(2).Infof("volume sync status %d", req.VolumdId) - - return resp, nil - -} - -func (vs *VolumeServer) VolumeSyncIndex(ctx context.Context, req *volume_server_pb.VolumeSyncIndexRequest) (*volume_server_pb.VolumeSyncIndexResponse, error) { - - resp := &volume_server_pb.VolumeSyncIndexResponse{} - - v := vs.store.GetVolume(storage.VolumeId(req.VolumdId)) - if v == nil { - return nil, fmt.Errorf("Not Found Volume Id %d", req.VolumdId) - } - - content, err := v.IndexFileContent() - - if err != nil { - glog.Errorf("sync volume %d index: %v", req.VolumdId, err) - } else { - glog.V(2).Infof("sync volume %d index", req.VolumdId) - } - - resp.IndexFileContent = content - - return resp, nil - -} - -func (vs *VolumeServer) VolumeSyncData(ctx context.Context, req *volume_server_pb.VolumeSyncDataRequest) (*volume_server_pb.VolumeSyncDataResponse, error) { - - resp := &volume_server_pb.VolumeSyncDataResponse{} - - v := vs.store.GetVolume(storage.VolumeId(req.VolumdId)) - if v == nil { - return nil, fmt.Errorf("Not Found Volume Id %d", req.VolumdId) - } - - if uint32(v.SuperBlock.CompactRevision) != req.Revision { - return nil, fmt.Errorf("Requested Volume Revision is %d, but current revision is %d", req.Revision, v.SuperBlock.CompactRevision) - } - - content, err := storage.ReadNeedleBlob(v.DataFile(), int64(req.Offset)*types.NeedlePaddingSize, req.Size, v.Version()) - if err != nil { - return nil, fmt.Errorf("read offset:%d size:%d", req.Offset, req.Size) - } - - id, err := types.ParseNeedleId(req.NeedleId) - if err != nil { - return nil, fmt.Errorf("parsing needle id %s: %v", req.NeedleId, err) - } - n := new(storage.Needle) - n.ParseNeedleHeader(content) - if id != n.Id { - return nil, fmt.Errorf("Expected file entry id %d, but found %d", id, n.Id) - } - - if err != nil { - glog.Errorf("sync volume %d data: %v", req.VolumdId, err) - } - - resp.FileContent = content - - return resp, nil - -} diff --git a/weed/server/volume_grpc_tail.go b/weed/server/volume_grpc_tail.go new file mode 100644 index 000000000..c26d6ed8f --- /dev/null +++ b/weed/server/volume_grpc_tail.go @@ -0,0 +1,136 @@ +package weed_server + +import ( + "context" + "fmt" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" +) + +func (vs *VolumeServer) VolumeTailSender(req *volume_server_pb.VolumeTailSenderRequest, stream volume_server_pb.VolumeServer_VolumeTailSenderServer) error { + + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v == nil { + return fmt.Errorf("not found volume id %d", req.VolumeId) + } + + defer glog.V(1).Infof("tailing volume %d finished", v.Id) + + lastTimestampNs := req.SinceNs + drainingSeconds := req.IdleTimeoutSeconds + + for { + lastProcessedTimestampNs, err := sendNeedlesSince(stream, v, lastTimestampNs) + if err != nil { + glog.Infof("sendNeedlesSince: %v", err) + return fmt.Errorf("streamFollow: %v", err) + } + time.Sleep(2 * time.Second) + + if req.IdleTimeoutSeconds == 0 { + lastTimestampNs = lastProcessedTimestampNs + continue + } + if lastProcessedTimestampNs == lastTimestampNs { + drainingSeconds-- + if drainingSeconds <= 0 { + return nil + } + glog.V(1).Infof("tailing volume %d drains requests with %d seconds remaining", v.Id, drainingSeconds) + } else { + lastTimestampNs = lastProcessedTimestampNs + drainingSeconds = req.IdleTimeoutSeconds + glog.V(1).Infof("tailing volume %d resets draining wait time to %d seconds", v.Id, drainingSeconds) + } + + } + +} + +func sendNeedlesSince(stream volume_server_pb.VolumeServer_VolumeTailSenderServer, v *storage.Volume, lastTimestampNs uint64) (lastProcessedTimestampNs uint64, err error) { + + foundOffset, isLastOne, err := v.BinarySearchByAppendAtNs(lastTimestampNs) + if err != nil { + return 0, fmt.Errorf("fail to locate by appendAtNs %d: %s", lastTimestampNs, err) + } + + // log.Printf("reading ts %d offset %d isLast %v", lastTimestampNs, foundOffset, isLastOne) + + if isLastOne { + // need to heart beat to the client to ensure the connection health + sendErr := stream.Send(&volume_server_pb.VolumeTailSenderResponse{IsLastChunk: true}) + return lastTimestampNs, sendErr + } + + scanner := &VolumeFileScanner4Tailing{ + stream: stream, + } + + err = storage.ScanVolumeFileFrom(v.Version(), v.DataBackend, foundOffset.ToAcutalOffset(), scanner) + + return scanner.lastProcessedTimestampNs, err + +} + +func (vs *VolumeServer) VolumeTailReceiver(ctx context.Context, req *volume_server_pb.VolumeTailReceiverRequest) (*volume_server_pb.VolumeTailReceiverResponse, error) { + + resp := &volume_server_pb.VolumeTailReceiverResponse{} + + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v == nil { + return resp, fmt.Errorf("receiver not found volume id %d", req.VolumeId) + } + + defer glog.V(1).Infof("receive tailing volume %d finished", v.Id) + + return resp, operation.TailVolumeFromSource(req.SourceVolumeServer, vs.grpcDialOption, v.Id, req.SinceNs, int(req.IdleTimeoutSeconds), func(n *needle.Needle) error { + _, _, err := vs.store.WriteVolumeNeedle(v.Id, n) + return err + }) + +} + +// generate the volume idx +type VolumeFileScanner4Tailing struct { + stream volume_server_pb.VolumeServer_VolumeTailSenderServer + lastProcessedTimestampNs uint64 +} + +func (scanner *VolumeFileScanner4Tailing) VisitSuperBlock(superBlock super_block.SuperBlock) error { + return nil + +} +func (scanner *VolumeFileScanner4Tailing) ReadNeedleBody() bool { + return true +} + +func (scanner *VolumeFileScanner4Tailing) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { + isLastChunk := false + + // need to send body by chunks + for i := 0; i < len(needleBody); i += BufferSizeLimit { + stopOffset := i + BufferSizeLimit + if stopOffset >= len(needleBody) { + isLastChunk = true + stopOffset = len(needleBody) + } + + sendErr := scanner.stream.Send(&volume_server_pb.VolumeTailSenderResponse{ + NeedleHeader: needleHeader, + NeedleBody: needleBody[i:stopOffset], + IsLastChunk: isLastChunk, + }) + if sendErr != nil { + return sendErr + } + } + + scanner.lastProcessedTimestampNs = n.AppendAtNs + return nil +} diff --git a/weed/server/volume_grpc_tier_download.go b/weed/server/volume_grpc_tier_download.go new file mode 100644 index 000000000..7b3982e40 --- /dev/null +++ b/weed/server/volume_grpc_tier_download.go @@ -0,0 +1,85 @@ +package weed_server + +import ( + "fmt" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +// VolumeTierMoveDatFromRemote copy dat file from a remote tier to local volume server +func (vs *VolumeServer) VolumeTierMoveDatFromRemote(req *volume_server_pb.VolumeTierMoveDatFromRemoteRequest, stream volume_server_pb.VolumeServer_VolumeTierMoveDatFromRemoteServer) error { + + // find existing volume + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v == nil { + return fmt.Errorf("volume %d not found", req.VolumeId) + } + + // verify the collection + if v.Collection != req.Collection { + return fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection) + } + + // locate the disk file + storageName, storageKey := v.RemoteStorageNameKey() + if storageName == "" || storageKey == "" { + return fmt.Errorf("volume %d is already on local disk", req.VolumeId) + } + + // check whether the local .dat already exists + _, ok := v.DataBackend.(*backend.DiskFile) + if ok { + return fmt.Errorf("volume %d is already on local disk", req.VolumeId) + } + + // check valid storage backend type + backendStorage, found := backend.BackendStorages[storageName] + if !found { + var keys []string + for key := range backend.BackendStorages { + keys = append(keys, key) + } + return fmt.Errorf("remote storage %s not found from suppported: %v", storageName, keys) + } + + startTime := time.Now() + fn := func(progressed int64, percentage float32) error { + now := time.Now() + if now.Sub(startTime) < time.Second { + return nil + } + startTime = now + return stream.Send(&volume_server_pb.VolumeTierMoveDatFromRemoteResponse{ + Processed: progressed, + ProcessedPercentage: percentage, + }) + } + // copy the data file + _, err := backendStorage.DownloadFile(v.FileName()+".dat", storageKey, fn) + if err != nil { + return fmt.Errorf("backend %s copy file %s: %v", storageName, v.FileName()+".dat", err) + } + + if req.KeepRemoteDatFile { + return nil + } + + // remove remote file + if err := backendStorage.DeleteFile(storageKey); err != nil { + return fmt.Errorf("volume %d fail to delete remote file %s: %v", v.Id, storageKey, err) + } + + // forget remote file + v.GetVolumeInfo().Files = v.GetVolumeInfo().Files[1:] + if err := v.SaveVolumeInfo(); err != nil { + return fmt.Errorf("volume %d fail to save remote file info: %v", v.Id, err) + } + + v.DataBackend.Close() + v.DataBackend = nil + + return nil +} diff --git a/weed/server/volume_grpc_tier_upload.go b/weed/server/volume_grpc_tier_upload.go new file mode 100644 index 000000000..c9694df59 --- /dev/null +++ b/weed/server/volume_grpc_tier_upload.go @@ -0,0 +1,100 @@ +package weed_server + +import ( + "fmt" + "os" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +// VolumeTierMoveDatToRemote copy dat file to a remote tier +func (vs *VolumeServer) VolumeTierMoveDatToRemote(req *volume_server_pb.VolumeTierMoveDatToRemoteRequest, stream volume_server_pb.VolumeServer_VolumeTierMoveDatToRemoteServer) error { + + // find existing volume + v := vs.store.GetVolume(needle.VolumeId(req.VolumeId)) + if v == nil { + return fmt.Errorf("volume %d not found", req.VolumeId) + } + + // verify the collection + if v.Collection != req.Collection { + return fmt.Errorf("existing collection:%v unexpected input: %v", v.Collection, req.Collection) + } + + // locate the disk file + diskFile, ok := v.DataBackend.(*backend.DiskFile) + if !ok { + return fmt.Errorf("volume %d is not on local disk", req.VolumeId) + } + + // check valid storage backend type + backendStorage, found := backend.BackendStorages[req.DestinationBackendName] + if !found { + var keys []string + for key := range backend.BackendStorages { + keys = append(keys, key) + } + return fmt.Errorf("destination %s not found, suppported: %v", req.DestinationBackendName, keys) + } + + // check whether the existing backend storage is the same as requested + // if same, skip + backendType, backendId := backend.BackendNameToTypeId(req.DestinationBackendName) + for _, remoteFile := range v.GetVolumeInfo().GetFiles() { + if remoteFile.BackendType == backendType && remoteFile.BackendId == backendId { + return fmt.Errorf("destination %s already exists", req.DestinationBackendName) + } + } + + startTime := time.Now() + fn := func(progressed int64, percentage float32) error { + now := time.Now() + if now.Sub(startTime) < time.Second { + return nil + } + startTime = now + return stream.Send(&volume_server_pb.VolumeTierMoveDatToRemoteResponse{ + Processed: progressed, + ProcessedPercentage: percentage, + }) + } + + // remember the file original source + attributes := make(map[string]string) + attributes["volumeId"] = v.Id.String() + attributes["collection"] = v.Collection + attributes["ext"] = ".dat" + // copy the data file + key, size, err := backendStorage.CopyFile(diskFile.File, attributes, fn) + if err != nil { + return fmt.Errorf("backend %s copy file %s: %v", req.DestinationBackendName, diskFile.Name(), err) + } + + // save the remote file to volume tier info + v.GetVolumeInfo().Files = append(v.GetVolumeInfo().GetFiles(), &volume_server_pb.RemoteFile{ + BackendType: backendType, + BackendId: backendId, + Key: key, + Offset: 0, + FileSize: uint64(size), + ModifiedTime: uint64(time.Now().Unix()), + Extension: ".dat", + }) + + if err := v.SaveVolumeInfo(); err != nil { + return fmt.Errorf("volume %d fail to save remote file info: %v", v.Id, err) + } + + if err := v.LoadRemoteFile(); err != nil { + return fmt.Errorf("volume %d fail to load remote file: %v", v.Id, err) + } + + if !req.KeepLocalDatFile { + os.Remove(v.FileName() + ".dat") + } + + return nil +} diff --git a/weed/server/volume_grpc_vacuum.go b/weed/server/volume_grpc_vacuum.go index f0c87b582..24f982241 100644 --- a/weed/server/volume_grpc_vacuum.go +++ b/weed/server/volume_grpc_vacuum.go @@ -5,19 +5,19 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) func (vs *VolumeServer) VacuumVolumeCheck(ctx context.Context, req *volume_server_pb.VacuumVolumeCheckRequest) (*volume_server_pb.VacuumVolumeCheckResponse, error) { resp := &volume_server_pb.VacuumVolumeCheckResponse{} - garbageRatio, err := vs.store.CheckCompactVolume(storage.VolumeId(req.VolumdId)) + garbageRatio, err := vs.store.CheckCompactVolume(needle.VolumeId(req.VolumeId)) resp.GarbageRatio = garbageRatio if err != nil { - glog.V(3).Infof("check volume %d: %v", req.VolumdId, err) + glog.V(3).Infof("check volume %d: %v", req.VolumeId, err) } return resp, err @@ -28,12 +28,12 @@ func (vs *VolumeServer) VacuumVolumeCompact(ctx context.Context, req *volume_ser resp := &volume_server_pb.VacuumVolumeCompactResponse{} - err := vs.store.CompactVolume(storage.VolumeId(req.VolumdId), req.Preallocate) + err := vs.store.CompactVolume(needle.VolumeId(req.VolumeId), req.Preallocate, vs.compactionBytePerSecond) if err != nil { - glog.Errorf("compact volume %d: %v", req.VolumdId, err) + glog.Errorf("compact volume %d: %v", req.VolumeId, err) } else { - glog.V(1).Infof("compact volume %d", req.VolumdId) + glog.V(1).Infof("compact volume %d", req.VolumeId) } return resp, err @@ -44,12 +44,12 @@ func (vs *VolumeServer) VacuumVolumeCommit(ctx context.Context, req *volume_serv resp := &volume_server_pb.VacuumVolumeCommitResponse{} - err := vs.store.CommitCompactVolume(storage.VolumeId(req.VolumdId)) + err := vs.store.CommitCompactVolume(needle.VolumeId(req.VolumeId)) if err != nil { - glog.Errorf("commit volume %d: %v", req.VolumdId, err) + glog.Errorf("commit volume %d: %v", req.VolumeId, err) } else { - glog.V(1).Infof("commit volume %d", req.VolumdId) + glog.V(1).Infof("commit volume %d", req.VolumeId) } return resp, err @@ -60,12 +60,12 @@ func (vs *VolumeServer) VacuumVolumeCleanup(ctx context.Context, req *volume_ser resp := &volume_server_pb.VacuumVolumeCleanupResponse{} - err := vs.store.CommitCleanupVolume(storage.VolumeId(req.VolumdId)) + err := vs.store.CommitCleanupVolume(needle.VolumeId(req.VolumeId)) if err != nil { - glog.Errorf("cleanup volume %d: %v", req.VolumdId, err) + glog.Errorf("cleanup volume %d: %v", req.VolumeId, err) } else { - glog.V(1).Infof("cleanup volume %d", req.VolumdId) + glog.V(1).Infof("cleanup volume %d", req.VolumeId) } return resp, err diff --git a/weed/server/volume_server.go b/weed/server/volume_server.go index 0914e81b0..2d716edc1 100644 --- a/weed/server/volume_server.go +++ b/weed/server/volume_server.go @@ -1,25 +1,36 @@ package weed_server import ( + "fmt" "net/http" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/storage" ) type VolumeServer struct { - MasterNodes []string - currentMaster string - pulseSeconds int - dataCenter string - rack string - store *storage.Store - guard *security.Guard + SeedMasterNodes []string + currentMaster string + pulseSeconds int + dataCenter string + rack string + store *storage.Store + guard *security.Guard + grpcDialOption grpc.DialOption - needleMapKind storage.NeedleMapType - FixJpgOrientation bool - ReadRedirect bool + needleMapKind storage.NeedleMapType + FixJpgOrientation bool + ReadRedirect bool + compactionBytePerSecond int64 + MetricsAddress string + MetricsIntervalSec int + fileSizeLimitBytes int64 } func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, @@ -30,26 +41,48 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, dataCenter string, rack string, whiteList []string, fixJpgOrientation bool, - readRedirect bool) *VolumeServer { + readRedirect bool, + compactionMBPerSecond int, + fileSizeLimitMB int, +) *VolumeServer { + + v := util.GetViper() + signingKey := v.GetString("jwt.signing.key") + v.SetDefault("jwt.signing.expires_after_seconds", 10) + expiresAfterSec := v.GetInt("jwt.signing.expires_after_seconds") + enableUiAccess := v.GetBool("access.ui") + + readSigningKey := v.GetString("jwt.signing.read.key") + v.SetDefault("jwt.signing.read.expires_after_seconds", 60) + readExpiresAfterSec := v.GetInt("jwt.signing.read.expires_after_seconds") + vs := &VolumeServer{ - pulseSeconds: pulseSeconds, - dataCenter: dataCenter, - rack: rack, - needleMapKind: needleMapKind, - FixJpgOrientation: fixJpgOrientation, - ReadRedirect: readRedirect, + pulseSeconds: pulseSeconds, + dataCenter: dataCenter, + rack: rack, + needleMapKind: needleMapKind, + FixJpgOrientation: fixJpgOrientation, + ReadRedirect: readRedirect, + grpcDialOption: security.LoadClientTLS(util.GetViper(), "grpc.volume"), + compactionBytePerSecond: int64(compactionMBPerSecond) * 1024 * 1024, + fileSizeLimitBytes: int64(fileSizeLimitMB) * 1024 * 1024, } - vs.MasterNodes = masterNodes - vs.store = storage.NewStore(port, ip, publicUrl, folders, maxCounts, vs.needleMapKind) + vs.SeedMasterNodes = masterNodes + vs.store = storage.NewStore(vs.grpcDialOption, port, ip, publicUrl, folders, maxCounts, vs.needleMapKind) - vs.guard = security.NewGuard(whiteList, "") + vs.guard = security.NewGuard(whiteList, signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec) handleStaticResources(adminMux) - adminMux.HandleFunc("/ui/index.html", vs.uiStatusHandler) - adminMux.HandleFunc("/status", vs.guard.WhiteList(vs.statusHandler)) - adminMux.HandleFunc("/stats/counter", vs.guard.WhiteList(statsCounterHandler)) - adminMux.HandleFunc("/stats/memory", vs.guard.WhiteList(statsMemoryHandler)) - adminMux.HandleFunc("/stats/disk", vs.guard.WhiteList(vs.statsDiskHandler)) + if signingKey == "" || enableUiAccess { + // only expose the volume server details for safe environments + adminMux.HandleFunc("/ui/index.html", vs.uiStatusHandler) + adminMux.HandleFunc("/status", vs.guard.WhiteList(vs.statusHandler)) + /* + adminMux.HandleFunc("/stats/counter", vs.guard.WhiteList(statsCounterHandler)) + adminMux.HandleFunc("/stats/memory", vs.guard.WhiteList(statsMemoryHandler)) + adminMux.HandleFunc("/stats/disk", vs.guard.WhiteList(vs.statsDiskHandler)) + */ + } adminMux.HandleFunc("/", vs.privateStoreHandler) if publicMux != adminMux { // separated admin and public port @@ -58,6 +91,11 @@ func NewVolumeServer(adminMux, publicMux *http.ServeMux, ip string, } go vs.heartbeat() + hostAddress := fmt.Sprintf("%s:%d", ip, port) + go stats.LoopPushingMetric("volumeServer", hostAddress, stats.VolumeServerGather, + func() (addr string, intervalSeconds int) { + return vs.MetricsAddress, vs.MetricsIntervalSec + }) return vs } @@ -67,7 +105,3 @@ func (vs *VolumeServer) Shutdown() { vs.store.Close() glog.V(0).Infoln("Shut down successfully!") } - -func (vs *VolumeServer) jwt(fileId string) security.EncodedJwt { - return security.GenJwt(vs.guard.SecretKey, fileId) -} diff --git a/weed/server/volume_server_handlers.go b/weed/server/volume_server_handlers.go index 77b1274fd..14ad27d42 100644 --- a/weed/server/volume_server_handlers.go +++ b/weed/server/volume_server_handlers.go @@ -2,7 +2,10 @@ package weed_server import ( "net/http" + "strings" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/stats" ) @@ -45,3 +48,47 @@ func (vs *VolumeServer) publicReadOnlyHandler(w http.ResponseWriter, r *http.Req vs.GetOrHeadHandler(w, r) } } + +func (vs *VolumeServer) maybeCheckJwtAuthorization(r *http.Request, vid, fid string, isWrite bool) bool { + + var signingKey security.SigningKey + + if isWrite { + if len(vs.guard.SigningKey) == 0 { + return true + } else { + signingKey = vs.guard.SigningKey + } + } else { + if len(vs.guard.ReadSigningKey) == 0 { + return true + } else { + signingKey = vs.guard.ReadSigningKey + } + } + + tokenStr := security.GetJwt(r) + if tokenStr == "" { + glog.V(1).Infof("missing jwt from %s", r.RemoteAddr) + return false + } + + token, err := security.DecodeJwt(signingKey, tokenStr) + if err != nil { + glog.V(1).Infof("jwt verification error from %s: %v", r.RemoteAddr, err) + return false + } + if !token.Valid { + glog.V(1).Infof("jwt invalid from %s: %v", r.RemoteAddr, tokenStr) + return false + } + + if sc, ok := token.Claims.(*security.SeaweedFileIdClaims); ok { + if sepIndex := strings.LastIndex(fid, "_"); sepIndex > 0 { + fid = fid[:sepIndex] + } + return sc.Fid == vid+","+fid + } + glog.V(1).Infof("unexpected jwt from %s: %v", r.RemoteAddr, tokenStr) + return false +} diff --git a/weed/server/volume_server_handlers_admin.go b/weed/server/volume_server_handlers_admin.go index 25b6582f7..89bc051c5 100644 --- a/weed/server/volume_server_handlers_admin.go +++ b/weed/server/volume_server_handlers_admin.go @@ -12,7 +12,14 @@ import ( func (vs *VolumeServer) statusHandler(w http.ResponseWriter, r *http.Request) { m := make(map[string]interface{}) m["Version"] = util.VERSION - m["Volumes"] = vs.store.Status() + var ds []*volume_server_pb.DiskStatus + for _, loc := range vs.store.Locations { + if dir, e := filepath.Abs(loc.Directory); e == nil { + ds = append(ds, stats.NewDiskStatus(dir)) + } + } + m["DiskStatuses"] = ds + m["Volumes"] = vs.store.VolumeInfos() writeJsonQuiet(w, r, http.StatusOK, m) } diff --git a/weed/server/volume_server_handlers_read.go b/weed/server/volume_server_handlers_read.go index 4da13883e..6e603d158 100644 --- a/weed/server/volume_server_handlers_read.go +++ b/weed/server/volume_server_handlers_read.go @@ -2,31 +2,43 @@ package weed_server import ( "bytes" + "encoding/json" + "errors" + "fmt" "io" "mime" - "mime/multipart" "net/http" "net/url" - "path" + "path/filepath" "strconv" "strings" "time" - "encoding/json" - "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/images" "github.com/chrislusf/seaweedfs/weed/operation" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/util" ) var fileNameEscaper = strings.NewReplacer("\\", "\\\\", "\"", "\\\"") func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) { - n := new(storage.Needle) + + stats.VolumeServerRequestCounter.WithLabelValues("get").Inc() + start := time.Now() + defer func() { stats.VolumeServerRequestHistogram.WithLabelValues("get").Observe(time.Since(start).Seconds()) }() + + n := new(needle.Needle) vid, fid, filename, ext, _ := parseURLPath(r.URL.Path) - volumeId, err := storage.NewVolumeId(vid) + + if !vs.maybeCheckJwtAuthorization(r, vid, fid, false) { + writeJsonError(w, r, http.StatusUnauthorized, errors.New("wrong jwt")) + return + } + + volumeId, err := needle.NewVolumeId(vid) if err != nil { glog.V(2).Infoln("parsing error:", err, r.URL.Path) w.WriteHeader(http.StatusBadRequest) @@ -39,8 +51,10 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) return } - glog.V(4).Infoln("volume", volumeId, "reading", n) - if !vs.store.HasVolume(volumeId) { + // glog.V(4).Infoln("volume", volumeId, "reading", n) + hasVolume := vs.store.HasVolume(volumeId) + _, hasEcVolume := vs.store.FindEcVolume(volumeId) + if !hasVolume && !hasEcVolume { if !vs.ReadRedirect { glog.V(2).Infoln("volume is not local:", err, r.URL.Path) w.WriteHeader(http.StatusNotFound) @@ -50,7 +64,7 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) glog.V(2).Infoln("volume", volumeId, "found on", lookupResult, "error", err) if err == nil && len(lookupResult.Locations) > 0 { u, _ := url.Parse(util.NormalizeUrl(lookupResult.Locations[0].PublicUrl)) - u.Path = r.URL.Path + u.Path = fmt.Sprintf("%s/%s,%s", u.Path, vid, fid) arg := url.Values{} if c := r.FormValue("collection"); c != "" { arg.Set("collection", c) @@ -65,15 +79,20 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) return } cookie := n.Cookie - count, e := vs.store.ReadVolumeNeedle(volumeId, n) - glog.V(4).Infoln("read bytes", count, "error", e) - if e != nil || count < 0 { - glog.V(0).Infof("read %s error: %v", r.URL.Path, e) + var count int + if hasVolume { + count, err = vs.store.ReadVolumeNeedle(volumeId, n) + } else if hasEcVolume { + count, err = vs.store.ReadEcShardNeedle(volumeId, n) + } + // glog.V(4).Infoln("read bytes", count, "error", err) + if err != nil || count < 0 { + glog.V(0).Infof("read %s isNormalVolume %v error: %v", r.URL.Path, hasVolume, err) w.WriteHeader(http.StatusNotFound) return } if n.Cookie != cookie { - glog.V(0).Infoln("request", r.URL.Path, "with unmaching cookie seen:", cookie, "expected:", n.Cookie, "from", r.RemoteAddr, "agent", r.UserAgent()) + glog.V(0).Infof("request %s with cookie:%x expected:%x from %s agent %s", r.URL.Path, cookie, n.Cookie, r.RemoteAddr, r.UserAgent()) w.WriteHeader(http.StatusNotFound) return } @@ -112,7 +131,7 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) if n.NameSize > 0 && filename == "" { filename = string(n.Name) if ext == "" { - ext = path.Ext(filename) + ext = filepath.Ext(filename) } } mtype := "" @@ -128,7 +147,7 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") { w.Header().Set("Content-Encoding", "gzip") } else { - if n.Data, err = operation.UnGzipData(n.Data); err != nil { + if n.Data, err = util.UnGzipData(n.Data); err != nil { glog.V(0).Infoln("ungzip error:", err, r.URL.Path) } } @@ -142,7 +161,7 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) } } -func (vs *VolumeServer) tryHandleChunkedFile(n *storage.Needle, fileName string, w http.ResponseWriter, r *http.Request) (processed bool) { +func (vs *VolumeServer) tryHandleChunkedFile(n *needle.Needle, fileName string, w http.ResponseWriter, r *http.Request) (processed bool) { if !n.IsChunkedManifest() || r.URL.Query().Get("cm") == "false" { return false } @@ -156,7 +175,7 @@ func (vs *VolumeServer) tryHandleChunkedFile(n *storage.Needle, fileName string, fileName = chunkManifest.Name } - ext := path.Ext(fileName) + ext := filepath.Ext(fileName) mType := "" if chunkManifest.Mime != "" { @@ -203,113 +222,28 @@ func conditionallyResizeImages(originalDataReaderSeeker io.ReadSeeker, ext strin func writeResponseContent(filename, mimeType string, rs io.ReadSeeker, w http.ResponseWriter, r *http.Request) error { totalSize, e := rs.Seek(0, 2) if mimeType == "" { - if ext := path.Ext(filename); ext != "" { + if ext := filepath.Ext(filename); ext != "" { mimeType = mime.TypeByExtension(ext) } } if mimeType != "" { w.Header().Set("Content-Type", mimeType) } - if filename != "" { - contentDisposition := "inline" - if r.FormValue("dl") != "" { - if dl, _ := strconv.ParseBool(r.FormValue("dl")); dl { - contentDisposition = "attachment" - } - } - w.Header().Set("Content-Disposition", contentDisposition+`; filename="`+fileNameEscaper.Replace(filename)+`"`) - } w.Header().Set("Accept-Ranges", "bytes") + if r.Method == "HEAD" { w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) return nil } - rangeReq := r.Header.Get("Range") - if rangeReq == "" { - w.Header().Set("Content-Length", strconv.FormatInt(totalSize, 10)) - if _, e = rs.Seek(0, 0); e != nil { - return e - } - _, e = io.Copy(w, rs) - return e - } - //the rest is dealing with partial content request - //mostly copy from src/pkg/net/http/fs.go - ranges, err := parseRange(rangeReq, totalSize) - if err != nil { - http.Error(w, err.Error(), http.StatusRequestedRangeNotSatisfiable) - return nil - } - if sumRangesSize(ranges) > totalSize { - // The total number of bytes in all the ranges - // is larger than the size of the file by - // itself, so this is probably an attack, or a - // dumb client. Ignore the range request. - return nil - } - if len(ranges) == 0 { - return nil - } - if len(ranges) == 1 { - // RFC 2616, Section 14.16: - // "When an HTTP message includes the content of a single - // range (for example, a response to a request for a - // single range, or to a request for a set of ranges - // that overlap without any holes), this content is - // transmitted with a Content-Range header, and a - // Content-Length header showing the number of bytes - // actually transferred. - // ... - // A response to a request for a single range MUST NOT - // be sent using the multipart/byteranges media type." - ra := ranges[0] - w.Header().Set("Content-Length", strconv.FormatInt(ra.length, 10)) - w.Header().Set("Content-Range", ra.contentRange(totalSize)) - w.WriteHeader(http.StatusPartialContent) - if _, e = rs.Seek(ra.start, 0); e != nil { + adjustHeadersAfterHEAD(w, r, filename) + + processRangeRequst(r, w, totalSize, mimeType, func(writer io.Writer, offset int64, size int64) error { + if _, e = rs.Seek(offset, 0); e != nil { return e } - - _, e = io.CopyN(w, rs, ra.length) + _, e = io.CopyN(writer, rs, size) return e - } - // process multiple ranges - for _, ra := range ranges { - if ra.start > totalSize { - http.Error(w, "Out of Range", http.StatusRequestedRangeNotSatisfiable) - return nil - } - } - sendSize := rangesMIMESize(ranges, mimeType, totalSize) - pr, pw := io.Pipe() - mw := multipart.NewWriter(pw) - w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary()) - sendContent := pr - defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish. - go func() { - for _, ra := range ranges { - part, e := mw.CreatePart(ra.mimeHeader(mimeType, totalSize)) - if e != nil { - pw.CloseWithError(e) - return - } - if _, e = rs.Seek(ra.start, 0); e != nil { - pw.CloseWithError(e) - return - } - if _, e = io.CopyN(part, rs, ra.length); e != nil { - pw.CloseWithError(e) - return - } - } - mw.Close() - pw.Close() - }() - if w.Header().Get("Content-Encoding") == "" { - w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10)) - } - w.WriteHeader(http.StatusPartialContent) - _, e = io.CopyN(w, sendContent, sendSize) - return e + }) + return nil } diff --git a/weed/server/volume_server_handlers_ui.go b/weed/server/volume_server_handlers_ui.go index b3d9a21fd..8d35c9c8b 100644 --- a/weed/server/volume_server_handlers_ui.go +++ b/weed/server/volume_server_handlers_ui.go @@ -8,6 +8,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" ui "github.com/chrislusf/seaweedfs/weed/server/volume_server_ui" "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage" "github.com/chrislusf/seaweedfs/weed/util" ) @@ -20,17 +21,30 @@ func (vs *VolumeServer) uiStatusHandler(w http.ResponseWriter, r *http.Request) ds = append(ds, stats.NewDiskStatus(dir)) } } + volumeInfos := vs.store.VolumeInfos() + var normalVolumeInfos, remoteVolumeInfos []*storage.VolumeInfo + for _, vinfo := range volumeInfos { + if vinfo.IsRemote() { + remoteVolumeInfos = append(remoteVolumeInfos, vinfo) + } else { + normalVolumeInfos = append(normalVolumeInfos, vinfo) + } + } args := struct { - Version string - Masters []string - Volumes interface{} - DiskStatuses interface{} - Stats interface{} - Counters *stats.ServerStats + Version string + Masters []string + Volumes interface{} + EcVolumes interface{} + RemoteVolumes interface{} + DiskStatuses interface{} + Stats interface{} + Counters *stats.ServerStats }{ util.VERSION, - vs.MasterNodes, - vs.store.Status(), + vs.SeedMasterNodes, + normalVolumeInfos, + vs.store.EcVolumes(), + remoteVolumeInfos, ds, infos, serverStats, diff --git a/weed/server/volume_server_handlers_write.go b/weed/server/volume_server_handlers_write.go index 790d109cf..101be4c43 100644 --- a/weed/server/volume_server_handlers_write.go +++ b/weed/server/volume_server_handlers_write.go @@ -10,55 +10,99 @@ import ( "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/topology" ) func (vs *VolumeServer) PostHandler(w http.ResponseWriter, r *http.Request) { + + stats.VolumeServerRequestCounter.WithLabelValues("post").Inc() + start := time.Now() + defer func() { + stats.VolumeServerRequestHistogram.WithLabelValues("post").Observe(time.Since(start).Seconds()) + }() + if e := r.ParseForm(); e != nil { glog.V(0).Infoln("form parse error:", e) writeJsonError(w, r, http.StatusBadRequest, e) return } - vid, _, _, _, _ := parseURLPath(r.URL.Path) - volumeId, ve := storage.NewVolumeId(vid) + + vid, fid, _, _, _ := parseURLPath(r.URL.Path) + volumeId, ve := needle.NewVolumeId(vid) if ve != nil { glog.V(0).Infoln("NewVolumeId error:", ve) writeJsonError(w, r, http.StatusBadRequest, ve) return } - needle, ne := storage.NewNeedle(r, vs.FixJpgOrientation) + + if !vs.maybeCheckJwtAuthorization(r, vid, fid, true) { + writeJsonError(w, r, http.StatusUnauthorized, errors.New("wrong jwt")) + return + } + + needle, originalSize, ne := needle.CreateNeedleFromRequest(r, vs.FixJpgOrientation, vs.fileSizeLimitBytes) if ne != nil { writeJsonError(w, r, http.StatusBadRequest, ne) return } ret := operation.UploadResult{} - size, errorStatus := topology.ReplicatedWrite(vs.GetMaster(), - vs.store, volumeId, needle, r) + _, isUnchanged, writeError := topology.ReplicatedWrite(vs.GetMaster(), vs.store, volumeId, needle, r) + + // http 204 status code does not allow body + if writeError == nil && isUnchanged { + setEtag(w, needle.Etag()) + w.WriteHeader(http.StatusNoContent) + return + } + httpStatus := http.StatusCreated - if errorStatus != "" { + if writeError != nil { httpStatus = http.StatusInternalServerError - ret.Error = errorStatus + ret.Error = writeError.Error() } if needle.HasName() { ret.Name = string(needle.Name) } - ret.Size = size - setEtag(w, needle.Etag()) + ret.Size = uint32(originalSize) + ret.ETag = needle.Etag() + ret.Mime = string(needle.Mime) + setEtag(w, ret.ETag) writeJsonQuiet(w, r, httpStatus, ret) } func (vs *VolumeServer) DeleteHandler(w http.ResponseWriter, r *http.Request) { - n := new(storage.Needle) + + stats.VolumeServerRequestCounter.WithLabelValues("delete").Inc() + start := time.Now() + defer func() { + stats.VolumeServerRequestHistogram.WithLabelValues("delete").Observe(time.Since(start).Seconds()) + }() + + n := new(needle.Needle) vid, fid, _, _, _ := parseURLPath(r.URL.Path) - volumeId, _ := storage.NewVolumeId(vid) + volumeId, _ := needle.NewVolumeId(vid) n.ParsePath(fid) + if !vs.maybeCheckJwtAuthorization(r, vid, fid, true) { + writeJsonError(w, r, http.StatusUnauthorized, errors.New("wrong jwt")) + return + } + // glog.V(2).Infof("volume %s deleting %s", vid, n) cookie := n.Cookie + ecVolume, hasEcVolume := vs.store.FindEcVolume(volumeId) + + if hasEcVolume { + count, err := vs.store.DeleteEcShardNeedle(ecVolume, n, cookie) + writeDeleteResult(err, count, w, r) + return + } + _, ok := vs.store.ReadVolumeNeedle(volumeId, n) if ok != nil { m := make(map[string]uint32) @@ -82,7 +126,7 @@ func (vs *VolumeServer) DeleteHandler(w http.ResponseWriter, r *http.Request) { return } // make sure all chunks had deleted before delete manifest - if e := chunkManifest.DeleteChunks(vs.GetMaster()); e != nil { + if e := chunkManifest.DeleteChunks(vs.GetMaster(), vs.grpcDialOption); e != nil { writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Delete chunks error: %v", e)) return } @@ -99,6 +143,11 @@ func (vs *VolumeServer) DeleteHandler(w http.ResponseWriter, r *http.Request) { _, err := topology.ReplicatedDelete(vs.GetMaster(), vs.store, volumeId, n, r) + writeDeleteResult(err, count, w, r) + +} + +func writeDeleteResult(err error, count int64, w http.ResponseWriter, r *http.Request) { if err == nil { m := make(map[string]int64) m["size"] = count @@ -106,7 +155,6 @@ func (vs *VolumeServer) DeleteHandler(w http.ResponseWriter, r *http.Request) { } else { writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Deletion Failed: %v", err)) } - } func setEtag(w http.ResponseWriter, etag string) { diff --git a/weed/server/volume_server_ui/templates.go b/weed/server/volume_server_ui/templates.go index 75482174e..1c1394369 100644 --- a/weed/server/volume_server_ui/templates.go +++ b/weed/server/volume_server_ui/templates.go @@ -1,11 +1,29 @@ package master_ui import ( + "fmt" "html/template" "strconv" "strings" ) +func bytesToHumanReadble(b uint64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%d B", b) + } + div, exp := uint64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.2f %ciB", float64(b)/float64(div), "KMGTPE"[exp]) +} + +func percentFrom(total uint64, part_of uint64) string { + return fmt.Sprintf("%.2f", (float64(part_of)/float64(total))*100) +} + func join(data []int64) string { var ret []string for _, d := range data { @@ -15,7 +33,9 @@ func join(data []int64) string { } var funcMap = template.FuncMap{ - "join": join, + "join": join, + "bytesToHumanReadble": bytesToHumanReadble, + "percentFrom": percentFrom, } var StatusTpl = template.Must(template.New("status").Funcs(funcMap).Parse(`<!DOCTYPE html> @@ -49,7 +69,7 @@ var StatusTpl = template.Must(template.New("status").Funcs(funcMap).Parse(`<!DOC <div class="container"> <div class="page-header"> <h1> - <img src="/seaweedfsstatic/seaweed50x50.png"></img> + <a href="https://github.com/chrislusf/seaweedfs"><img src="/seaweedfsstatic/seaweed50x50.png"></img></a> SeaweedFS <small>{{ .Version }}</small> </h1> </div> @@ -57,13 +77,25 @@ var StatusTpl = template.Must(template.New("status").Funcs(funcMap).Parse(`<!DOC <div class="row"> <div class="col-sm-6"> <h2>Disk Stats</h2> - <table class="table table-condensed table-striped"> + <table class="table table-striped"> + <thead> + <tr> + <th>Path</th> + <th>Total</th> + <th>Free</th> + <th>Usage</th> + </tr> + </thead> + <tbody> {{ range .DiskStatuses }} <tr> - <th>{{ .Dir }}</th> - <td>{{ .Free }} Bytes Free</td> + <td>{{ .Dir }}</td> + <td>{{ bytesToHumanReadble .All }}</td> + <td>{{ bytesToHumanReadble .Free }}</td> + <td>{{ percentFrom .All .Used}}%</td> </tr> {{ end }} + </tbody> </table> </div> @@ -107,10 +139,11 @@ var StatusTpl = template.Must(template.New("status").Funcs(funcMap).Parse(`<!DOC <tr> <th>Id</th> <th>Collection</th> - <th>Size</th> + <th>Data Size</th> <th>Files</th> <th>Trash</th> <th>TTL</th> + <th>ReadOnly</th> </tr> </thead> <tbody> @@ -122,6 +155,63 @@ var StatusTpl = template.Must(template.New("status").Funcs(funcMap).Parse(`<!DOC <td>{{ .FileCount }}</td> <td>{{ .DeleteCount }} / {{.DeletedByteCount}} Bytes</td> <td>{{ .Ttl }}</td> + <td>{{ .ReadOnly }}</td> + </tr> + {{ end }} + </tbody> + </table> + </div> + + <div class="row"> + <h2>Remote Volumes</h2> + <table class="table table-striped"> + <thead> + <tr> + <th>Id</th> + <th>Collection</th> + <th>Size</th> + <th>Files</th> + <th>Trash</th> + <th>Remote</th> + <th>Key</th> + </tr> + </thead> + <tbody> + {{ range .RemoteVolumes }} + <tr> + <td><code>{{ .Id }}</code></td> + <td>{{ .Collection }}</td> + <td>{{ .Size }} Bytes</td> + <td>{{ .FileCount }}</td> + <td>{{ .DeleteCount }} / {{.DeletedByteCount}} Bytes</td> + <td>{{ .RemoteStorageName }}</td> + <td>{{ .RemoteStorageKey }}</td> + </tr> + {{ end }} + </tbody> + </table> + </div> + + <div class="row"> + <h2>Erasure Coding Shards</h2> + <table class="table table-striped"> + <thead> + <tr> + <th>Id</th> + <th>Collection</th> + <th>Shard Size</th> + <th>Shards</th> + <th>CreatedAt</th> + </tr> + </thead> + <tbody> + {{ range .EcVolumes }} + <tr> + <td><code>{{ .VolumeId }}</code></td> + <td>{{ .Collection }}</td> + <td>{{ .ShardSize }} Bytes</td> + <td>{{ .ShardIdList }}</td> + <td>{{ .CreatedAt.Format "02 Jan 06 15:04 -0700" }}</td> </tr> {{ end }} </tbody> diff --git a/weed/server/webdav_server.go b/weed/server/webdav_server.go new file mode 100644 index 000000000..1fb0912c5 --- /dev/null +++ b/weed/server/webdav_server.go @@ -0,0 +1,586 @@ +package weed_server + +import ( + "context" + "fmt" + "io" + "os" + "path" + "strings" + "time" + + "golang.org/x/net/webdav" + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/security" +) + +type WebDavOption struct { + Filer string + FilerGrpcAddress string + DomainName string + BucketsPath string + GrpcDialOption grpc.DialOption + Collection string + Uid uint32 + Gid uint32 + Cipher bool +} + +type WebDavServer struct { + option *WebDavOption + secret security.SigningKey + filer *filer2.Filer + grpcDialOption grpc.DialOption + Handler *webdav.Handler +} + +func NewWebDavServer(option *WebDavOption) (ws *WebDavServer, err error) { + + fs, _ := NewWebDavFileSystem(option) + + ws = &WebDavServer{ + option: option, + grpcDialOption: security.LoadClientTLS(util.GetViper(), "grpc.filer"), + Handler: &webdav.Handler{ + FileSystem: fs, + LockSystem: webdav.NewMemLS(), + }, + } + + return ws, nil +} + +// adapted from https://github.com/mattn/davfs/blob/master/plugin/mysql/mysql.go + +type WebDavFileSystem struct { + option *WebDavOption + secret security.SigningKey + filer *filer2.Filer + grpcDialOption grpc.DialOption +} + +type FileInfo struct { + name string + size int64 + mode os.FileMode + modifiledTime time.Time + isDirectory bool +} + +func (fi *FileInfo) Name() string { return fi.name } +func (fi *FileInfo) Size() int64 { return fi.size } +func (fi *FileInfo) Mode() os.FileMode { return fi.mode } +func (fi *FileInfo) ModTime() time.Time { return fi.modifiledTime } +func (fi *FileInfo) IsDir() bool { return fi.isDirectory } +func (fi *FileInfo) Sys() interface{} { return nil } + +type WebDavFile struct { + fs *WebDavFileSystem + name string + isDirectory bool + off int64 + entry *filer_pb.Entry + entryViewCache []filer2.VisibleInterval +} + +func NewWebDavFileSystem(option *WebDavOption) (webdav.FileSystem, error) { + return &WebDavFileSystem{ + option: option, + }, nil +} + +func (fs *WebDavFileSystem) WithFilerClient(fn func(filer_pb.SeaweedFilerClient) error) error { + + return pb.WithCachedGrpcClient(func(grpcConnection *grpc.ClientConn) error { + client := filer_pb.NewSeaweedFilerClient(grpcConnection) + return fn(client) + }, fs.option.FilerGrpcAddress, fs.option.GrpcDialOption) + +} +func (fs *WebDavFileSystem) AdjustedUrl(hostAndPort string) string { + return hostAndPort +} + +func clearName(name string) (string, error) { + slashed := strings.HasSuffix(name, "/") + name = path.Clean(name) + if !strings.HasSuffix(name, "/") && slashed { + name += "/" + } + if !strings.HasPrefix(name, "/") { + return "", os.ErrInvalid + } + return name, nil +} + +func (fs *WebDavFileSystem) Mkdir(ctx context.Context, fullDirPath string, perm os.FileMode) error { + + glog.V(2).Infof("WebDavFileSystem.Mkdir %v", fullDirPath) + + if !strings.HasSuffix(fullDirPath, "/") { + fullDirPath += "/" + } + + var err error + if fullDirPath, err = clearName(fullDirPath); err != nil { + return err + } + + _, err = fs.stat(ctx, fullDirPath) + if err == nil { + return os.ErrExist + } + + return fs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + dir, name := filer2.FullPath(fullDirPath).DirAndName() + request := &filer_pb.CreateEntryRequest{ + Directory: dir, + Entry: &filer_pb.Entry{ + Name: name, + IsDirectory: true, + Attributes: &filer_pb.FuseAttributes{ + Mtime: time.Now().Unix(), + Crtime: time.Now().Unix(), + FileMode: uint32(perm | os.ModeDir), + Uid: fs.option.Uid, + Gid: fs.option.Gid, + }, + }, + } + + glog.V(1).Infof("mkdir: %v", request) + if err := filer_pb.CreateEntry(client, request); err != nil { + return fmt.Errorf("mkdir %s/%s: %v", dir, name, err) + } + + return nil + }) +} + +func (fs *WebDavFileSystem) OpenFile(ctx context.Context, fullFilePath string, flag int, perm os.FileMode) (webdav.File, error) { + + glog.V(2).Infof("WebDavFileSystem.OpenFile %v %x", fullFilePath, flag) + + var err error + if fullFilePath, err = clearName(fullFilePath); err != nil { + return nil, err + } + + if flag&os.O_CREATE != 0 { + // file should not have / suffix. + if strings.HasSuffix(fullFilePath, "/") { + return nil, os.ErrInvalid + } + _, err = fs.stat(ctx, fullFilePath) + if err == nil { + if flag&os.O_EXCL != 0 { + return nil, os.ErrExist + } + fs.removeAll(ctx, fullFilePath) + } + + dir, name := filer2.FullPath(fullFilePath).DirAndName() + err = fs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + if err := filer_pb.CreateEntry(client, &filer_pb.CreateEntryRequest{ + Directory: dir, + Entry: &filer_pb.Entry{ + Name: name, + IsDirectory: perm&os.ModeDir > 0, + Attributes: &filer_pb.FuseAttributes{ + Mtime: time.Now().Unix(), + Crtime: time.Now().Unix(), + FileMode: uint32(perm), + Uid: fs.option.Uid, + Gid: fs.option.Gid, + Collection: fs.option.Collection, + Replication: "000", + TtlSec: 0, + }, + }, + }); err != nil { + return fmt.Errorf("create %s: %v", fullFilePath, err) + } + return nil + }) + if err != nil { + return nil, err + } + return &WebDavFile{ + fs: fs, + name: fullFilePath, + isDirectory: false, + }, nil + } + + fi, err := fs.stat(ctx, fullFilePath) + if err != nil { + return nil, os.ErrNotExist + } + if !strings.HasSuffix(fullFilePath, "/") && fi.IsDir() { + fullFilePath += "/" + } + + return &WebDavFile{ + fs: fs, + name: fullFilePath, + isDirectory: false, + }, nil + +} + +func (fs *WebDavFileSystem) removeAll(ctx context.Context, fullFilePath string) error { + var err error + if fullFilePath, err = clearName(fullFilePath); err != nil { + return err + } + + fi, err := fs.stat(ctx, fullFilePath) + if err != nil { + return err + } + + if fi.IsDir() { + //_, err = fs.db.Exec(`delete from filesystem where fullFilePath like $1 escape '\'`, strings.Replace(fullFilePath, `%`, `\%`, -1)+`%`) + } else { + //_, err = fs.db.Exec(`delete from filesystem where fullFilePath = ?`, fullFilePath) + } + dir, name := filer2.FullPath(fullFilePath).DirAndName() + err = fs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.DeleteEntryRequest{ + Directory: dir, + Name: name, + IsDeleteData: true, + } + + glog.V(3).Infof("removing entry: %v", request) + _, err := client.DeleteEntry(ctx, request) + if err != nil { + return fmt.Errorf("remove %s: %v", fullFilePath, err) + } + + return nil + }) + return err +} + +func (fs *WebDavFileSystem) RemoveAll(ctx context.Context, name string) error { + + glog.V(2).Infof("WebDavFileSystem.RemoveAll %v", name) + + return fs.removeAll(ctx, name) +} + +func (fs *WebDavFileSystem) Rename(ctx context.Context, oldName, newName string) error { + + glog.V(2).Infof("WebDavFileSystem.Rename %v to %v", oldName, newName) + + var err error + if oldName, err = clearName(oldName); err != nil { + return err + } + if newName, err = clearName(newName); err != nil { + return err + } + + of, err := fs.stat(ctx, oldName) + if err != nil { + return os.ErrExist + } + if of.IsDir() { + if strings.HasSuffix(oldName, "/") { + oldName = strings.TrimRight(oldName, "/") + } + if strings.HasSuffix(newName, "/") { + newName = strings.TrimRight(newName, "/") + } + } + + _, err = fs.stat(ctx, newName) + if err == nil { + return os.ErrExist + } + + oldDir, oldBaseName := filer2.FullPath(oldName).DirAndName() + newDir, newBaseName := filer2.FullPath(newName).DirAndName() + + return fs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.AtomicRenameEntryRequest{ + OldDirectory: oldDir, + OldName: oldBaseName, + NewDirectory: newDir, + NewName: newBaseName, + } + + _, err := client.AtomicRenameEntry(ctx, request) + if err != nil { + return fmt.Errorf("renaming %s/%s => %s/%s: %v", oldDir, oldBaseName, newDir, newBaseName, err) + } + + return nil + + }) +} + +func (fs *WebDavFileSystem) stat(ctx context.Context, fullFilePath string) (os.FileInfo, error) { + var err error + if fullFilePath, err = clearName(fullFilePath); err != nil { + return nil, err + } + + fullpath := filer2.FullPath(fullFilePath) + + var fi FileInfo + entry, err := filer2.GetEntry(fs, fullpath) + if entry == nil { + return nil, os.ErrNotExist + } + if err != nil { + return nil, err + } + fi.size = int64(filer2.TotalSize(entry.GetChunks())) + fi.name = string(fullpath) + fi.mode = os.FileMode(entry.Attributes.FileMode) + fi.modifiledTime = time.Unix(entry.Attributes.Mtime, 0) + fi.isDirectory = entry.IsDirectory + + if fi.name == "/" { + fi.modifiledTime = time.Now() + fi.isDirectory = true + } + return &fi, nil +} + +func (fs *WebDavFileSystem) Stat(ctx context.Context, name string) (os.FileInfo, error) { + + glog.V(2).Infof("WebDavFileSystem.Stat %v", name) + + return fs.stat(ctx, name) +} + +func (f *WebDavFile) Write(buf []byte) (int, error) { + + glog.V(2).Infof("WebDavFileSystem.Write %v", f.name) + + dir, _ := filer2.FullPath(f.name).DirAndName() + + var err error + ctx := context.Background() + if f.entry == nil { + f.entry, err = filer2.GetEntry(f.fs, filer2.FullPath(f.name)) + } + + if f.entry == nil { + return 0, err + } + if err != nil { + return 0, err + } + + var fileId, host string + var auth security.EncodedJwt + var collection, replication string + + if err = f.fs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.AssignVolumeRequest{ + Count: 1, + Replication: "", + Collection: f.fs.option.Collection, + ParentPath: dir, + } + + resp, err := client.AssignVolume(ctx, request) + if err != nil { + glog.V(0).Infof("assign volume failure %v: %v", request, err) + return err + } + if resp.Error != "" { + return fmt.Errorf("assign volume failure %v: %v", request, resp.Error) + } + + fileId, host, auth = resp.FileId, resp.Url, security.EncodedJwt(resp.Auth) + collection, replication = resp.Collection, resp.Replication + + return nil + }); err != nil { + return 0, fmt.Errorf("filerGrpcAddress assign volume: %v", err) + } + + fileUrl := fmt.Sprintf("http://%s/%s", host, fileId) + uploadResult, err := operation.UploadData(fileUrl, f.name, f.fs.option.Cipher, buf, false, "", nil, auth) + if err != nil { + glog.V(0).Infof("upload data %v to %s: %v", f.name, fileUrl, err) + return 0, fmt.Errorf("upload data: %v", err) + } + if uploadResult.Error != "" { + glog.V(0).Infof("upload failure %v to %s: %v", f.name, fileUrl, err) + return 0, fmt.Errorf("upload result: %v", uploadResult.Error) + } + + chunk := &filer_pb.FileChunk{ + FileId: fileId, + Offset: f.off, + Size: uint64(len(buf)), + Mtime: time.Now().UnixNano(), + ETag: uploadResult.ETag, + CipherKey: uploadResult.CipherKey, + IsGzipped: uploadResult.Gzip > 0, + } + + f.entry.Chunks = append(f.entry.Chunks, chunk) + + err = f.fs.WithFilerClient(func(client filer_pb.SeaweedFilerClient) error { + f.entry.Attributes.Mtime = time.Now().Unix() + f.entry.Attributes.Collection = collection + f.entry.Attributes.Replication = replication + + request := &filer_pb.UpdateEntryRequest{ + Directory: dir, + Entry: f.entry, + } + + if _, err := client.UpdateEntry(ctx, request); err != nil { + return fmt.Errorf("update %s: %v", f.name, err) + } + + return nil + }) + + if err == nil { + glog.V(3).Infof("WebDavFileSystem.Write %v: written [%d,%d)", f.name, f.off, f.off+int64(len(buf))) + f.off += int64(len(buf)) + } + + return len(buf), err +} + +func (f *WebDavFile) Close() error { + + glog.V(2).Infof("WebDavFileSystem.Close %v", f.name) + + if f.entry != nil { + f.entry = nil + f.entryViewCache = nil + } + + return nil +} + +func (f *WebDavFile) Read(p []byte) (readSize int, err error) { + + glog.V(2).Infof("WebDavFileSystem.Read %v", f.name) + + if f.entry == nil { + f.entry, err = filer2.GetEntry(f.fs, filer2.FullPath(f.name)) + } + if f.entry == nil { + return 0, err + } + if err != nil { + return 0, err + } + if len(f.entry.Chunks) == 0 { + return 0, io.EOF + } + if f.entryViewCache == nil { + f.entryViewCache = filer2.NonOverlappingVisibleIntervals(f.entry.Chunks) + } + chunkViews := filer2.ViewFromVisibleIntervals(f.entryViewCache, f.off, len(p)) + + totalRead, err := filer2.ReadIntoBuffer(f.fs, filer2.FullPath(f.name), p, chunkViews, f.off) + if err != nil { + return 0, err + } + readSize = int(totalRead) + + glog.V(3).Infof("WebDavFileSystem.Read %v: [%d,%d)", f.name, f.off, f.off+totalRead) + + f.off += totalRead + if readSize == 0 { + return 0, io.EOF + } + + return +} + +func (f *WebDavFile) Readdir(count int) (ret []os.FileInfo, err error) { + + glog.V(2).Infof("WebDavFileSystem.Readdir %v count %d", f.name, count) + + dir, _ := filer2.FullPath(f.name).DirAndName() + + err = filer2.ReadDirAllEntries(f.fs, filer2.FullPath(dir), "", func(entry *filer_pb.Entry, isLast bool) { + fi := FileInfo{ + size: int64(filer2.TotalSize(entry.GetChunks())), + name: entry.Name, + mode: os.FileMode(entry.Attributes.FileMode), + modifiledTime: time.Unix(entry.Attributes.Mtime, 0), + isDirectory: entry.IsDirectory, + } + + if !strings.HasSuffix(fi.name, "/") && fi.IsDir() { + fi.name += "/" + } + glog.V(4).Infof("entry: %v", fi.name) + ret = append(ret, &fi) + }) + + old := f.off + if old >= int64(len(ret)) { + if count > 0 { + return nil, io.EOF + } + return nil, nil + } + if count > 0 { + f.off += int64(count) + if f.off > int64(len(ret)) { + f.off = int64(len(ret)) + } + } else { + f.off = int64(len(ret)) + old = 0 + } + + return ret[old:f.off], nil +} + +func (f *WebDavFile) Seek(offset int64, whence int) (int64, error) { + + glog.V(2).Infof("WebDavFile.Seek %v %v %v", f.name, offset, whence) + + ctx := context.Background() + + var err error + switch whence { + case 0: + f.off = 0 + case 2: + if fi, err := f.fs.stat(ctx, f.name); err != nil { + return 0, err + } else { + f.off = fi.Size() + } + } + f.off += offset + return f.off, err +} + +func (f *WebDavFile) Stat() (os.FileInfo, error) { + + glog.V(2).Infof("WebDavFile.Stat %v", f.name) + + ctx := context.Background() + + return f.fs.stat(ctx, f.name) +} diff --git a/weed/shell/command_bucket_create.go b/weed/shell/command_bucket_create.go new file mode 100644 index 000000000..3546528aa --- /dev/null +++ b/weed/shell/command_bucket_create.go @@ -0,0 +1,88 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "io" + "os" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func init() { + Commands = append(Commands, &commandBucketCreate{}) +} + +type commandBucketCreate struct { +} + +func (c *commandBucketCreate) Name() string { + return "bucket.create" +} + +func (c *commandBucketCreate) Help() string { + return `create a bucket with a given name + + Example: + bucket.create -name <bucket_name> -replication 001 +` +} + +func (c *commandBucketCreate) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + bucketCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + bucketName := bucketCommand.String("name", "", "bucket name") + replication := bucketCommand.String("replication", "", "replication setting for the bucket") + if err = bucketCommand.Parse(args); err != nil { + return nil + } + + if *bucketName == "" { + return fmt.Errorf("empty bucket name") + } + + filerServer, filerPort, _, parseErr := commandEnv.parseUrl(findInputDirectory(bucketCommand.Args())) + if parseErr != nil { + return parseErr + } + + err = commandEnv.withFilerClient(filerServer, filerPort, func(client filer_pb.SeaweedFilerClient) error { + + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get filer %s:%d configuration: %v", filerServer, filerPort, err) + } + filerBucketsPath := resp.DirBuckets + + println("create bucket under", filerBucketsPath) + + entry := &filer_pb.Entry{ + Name: *bucketName, + IsDirectory: true, + Attributes: &filer_pb.FuseAttributes{ + Mtime: time.Now().Unix(), + Crtime: time.Now().Unix(), + FileMode: uint32(0777 | os.ModeDir), + Collection: *bucketName, + Replication: *replication, + }, + } + + if err := filer_pb.CreateEntry(client, &filer_pb.CreateEntryRequest{ + Directory: filerBucketsPath, + Entry: entry, + }); err != nil { + return err + } + + println("created bucket", *bucketName) + + return nil + + }) + + return err + +} diff --git a/weed/shell/command_bucket_delete.go b/weed/shell/command_bucket_delete.go new file mode 100644 index 000000000..c57ce7221 --- /dev/null +++ b/weed/shell/command_bucket_delete.go @@ -0,0 +1,71 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func init() { + Commands = append(Commands, &commandBucketDelete{}) +} + +type commandBucketDelete struct { +} + +func (c *commandBucketDelete) Name() string { + return "bucket.delete" +} + +func (c *commandBucketDelete) Help() string { + return `delete a bucket by a given name + + bucket.delete -name <bucket_name> +` +} + +func (c *commandBucketDelete) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + bucketCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + bucketName := bucketCommand.String("name", "", "bucket name") + if err = bucketCommand.Parse(args); err != nil { + return nil + } + + if *bucketName == "" { + return fmt.Errorf("empty bucket name") + } + + filerServer, filerPort, _, parseErr := commandEnv.parseUrl(findInputDirectory(bucketCommand.Args())) + if parseErr != nil { + return parseErr + } + + err = commandEnv.withFilerClient(filerServer, filerPort, func(client filer_pb.SeaweedFilerClient) error { + + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get filer %s:%d configuration: %v", filerServer, filerPort, err) + } + filerBucketsPath := resp.DirBuckets + + if _, err := client.DeleteEntry(context.Background(), &filer_pb.DeleteEntryRequest{ + Directory: filerBucketsPath, + Name: *bucketName, + IsDeleteData: false, + IsRecursive: true, + IgnoreRecursiveError: true, + }); err != nil { + return err + } + + return nil + + }) + + return err + +} diff --git a/weed/shell/command_bucket_list.go b/weed/shell/command_bucket_list.go new file mode 100644 index 000000000..5eb5972ce --- /dev/null +++ b/weed/shell/command_bucket_list.go @@ -0,0 +1,81 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "io" + "math" + + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func init() { + Commands = append(Commands, &commandBucketList{}) +} + +type commandBucketList struct { +} + +func (c *commandBucketList) Name() string { + return "bucket.list" +} + +func (c *commandBucketList) Help() string { + return `list all buckets + +` +} + +func (c *commandBucketList) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + bucketCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + if err = bucketCommand.Parse(args); err != nil { + return nil + } + + filerServer, filerPort, _, parseErr := commandEnv.parseUrl(findInputDirectory(bucketCommand.Args())) + if parseErr != nil { + return parseErr + } + + err = commandEnv.withFilerClient(filerServer, filerPort, func(client filer_pb.SeaweedFilerClient) error { + + resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}) + if err != nil { + return fmt.Errorf("get filer %s:%d configuration: %v", filerServer, filerPort, err) + } + filerBucketsPath := resp.DirBuckets + + stream, err := client.ListEntries(context.Background(), &filer_pb.ListEntriesRequest{ + Directory: filerBucketsPath, + Limit: math.MaxUint32, + }) + if err != nil { + return fmt.Errorf("list buckets under %v: %v", filerBucketsPath, err) + } + + for { + resp, recvErr := stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + break + } else { + return recvErr + } + } + + if resp.Entry.Attributes.Replication == "" || resp.Entry.Attributes.Replication == "000" { + fmt.Fprintf(writer, " %s\n", resp.Entry.Name) + } else { + fmt.Fprintf(writer, " %s\t\t\treplication: %s\n", resp.Entry.Name, resp.Entry.Attributes.Replication) + } + } + + return nil + + }) + + return err + +} diff --git a/weed/shell/command_collection_delete.go b/weed/shell/command_collection_delete.go new file mode 100644 index 000000000..4b3d7f0be --- /dev/null +++ b/weed/shell/command_collection_delete.go @@ -0,0 +1,50 @@ +package shell + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "io" +) + +func init() { + Commands = append(Commands, &commandCollectionDelete{}) +} + +type commandCollectionDelete struct { +} + +func (c *commandCollectionDelete) Name() string { + return "collection.delete" +} + +func (c *commandCollectionDelete) Help() string { + return `delete specified collection + + collection.delete <collection_name> + +` +} + +func (c *commandCollectionDelete) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + if len(args) == 0 { + return nil + } + + collectionName := args[0] + + err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + _, err = client.CollectionDelete(context.Background(), &master_pb.CollectionDeleteRequest{ + Name: collectionName, + }) + return err + }) + if err != nil { + return + } + + fmt.Fprintf(writer, "collection %s is deleted.\n", collectionName) + + return nil +} diff --git a/weed/shell/command_collection_list.go b/weed/shell/command_collection_list.go new file mode 100644 index 000000000..2a114e61b --- /dev/null +++ b/weed/shell/command_collection_list.go @@ -0,0 +1,58 @@ +package shell + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "io" +) + +func init() { + Commands = append(Commands, &commandCollectionList{}) +} + +type commandCollectionList struct { +} + +func (c *commandCollectionList) Name() string { + return "collection.list" +} + +func (c *commandCollectionList) Help() string { + return `list all collections` +} + +func (c *commandCollectionList) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + collections, err := ListCollectionNames(commandEnv, true, true) + + if err != nil { + return err + } + + for _, c := range collections { + fmt.Fprintf(writer, "collection:\"%s\"\n", c) + } + + fmt.Fprintf(writer, "Total %d collections.\n", len(collections)) + + return nil +} + +func ListCollectionNames(commandEnv *CommandEnv, includeNormalVolumes, includeEcVolumes bool) (collections []string, err error) { + var resp *master_pb.CollectionListResponse + err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + resp, err = client.CollectionList(context.Background(), &master_pb.CollectionListRequest{ + IncludeNormalVolumes: includeNormalVolumes, + IncludeEcVolumes: includeEcVolumes, + }) + return err + }) + if err != nil { + return + } + for _, c := range resp.Collections { + collections = append(collections, c.Name) + } + return +} diff --git a/weed/shell/command_ec_balance.go b/weed/shell/command_ec_balance.go new file mode 100644 index 000000000..299d44fed --- /dev/null +++ b/weed/shell/command_ec_balance.go @@ -0,0 +1,515 @@ +package shell + +import ( + "flag" + "fmt" + "io" + "sort" + + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func init() { + Commands = append(Commands, &commandEcBalance{}) +} + +type commandEcBalance struct { +} + +func (c *commandEcBalance) Name() string { + return "ec.balance" +} + +func (c *commandEcBalance) Help() string { + return `balance all ec shards among all racks and volume servers + + ec.balance [-c EACH_COLLECTION|<collection_name>] [-force] [-dataCenter <data_center>] + + Algorithm: + + For each type of volume server (different max volume count limit){ + for each collection: + balanceEcVolumes(collectionName) + for each rack: + balanceEcRack(rack) + } + + func balanceEcVolumes(collectionName){ + for each volume: + doDeduplicateEcShards(volumeId) + + tracks rack~shardCount mapping + for each volume: + doBalanceEcShardsAcrossRacks(volumeId) + + for each volume: + doBalanceEcShardsWithinRacks(volumeId) + } + + // spread ec shards into more racks + func doBalanceEcShardsAcrossRacks(volumeId){ + tracks rack~volumeIdShardCount mapping + averageShardsPerEcRack = totalShardNumber / numRacks // totalShardNumber is 14 for now, later could varies for each dc + ecShardsToMove = select overflown ec shards from racks with ec shard counts > averageShardsPerEcRack + for each ecShardsToMove { + destRack = pickOneRack(rack~shardCount, rack~volumeIdShardCount, averageShardsPerEcRack) + destVolumeServers = volume servers on the destRack + pickOneEcNodeAndMoveOneShard(destVolumeServers) + } + } + + func doBalanceEcShardsWithinRacks(volumeId){ + racks = collect all racks that the volume id is on + for rack, shards := range racks + doBalanceEcShardsWithinOneRack(volumeId, shards, rack) + } + + // move ec shards + func doBalanceEcShardsWithinOneRack(volumeId, shards, rackId){ + tracks volumeServer~volumeIdShardCount mapping + averageShardCount = len(shards) / numVolumeServers + volumeServersOverAverage = volume servers with volumeId's ec shard counts > averageShardsPerEcRack + ecShardsToMove = select overflown ec shards from volumeServersOverAverage + for each ecShardsToMove { + destVolumeServer = pickOneVolumeServer(volumeServer~shardCount, volumeServer~volumeIdShardCount, averageShardCount) + pickOneEcNodeAndMoveOneShard(destVolumeServers) + } + } + + // move ec shards while keeping shard distribution for the same volume unchanged or more even + func balanceEcRack(rack){ + averageShardCount = total shards / numVolumeServers + for hasMovedOneEcShard { + sort all volume servers ordered by the number of local ec shards + pick the volume server A with the lowest number of ec shards x + pick the volume server B with the highest number of ec shards y + if y > averageShardCount and x +1 <= averageShardCount { + if B has a ec shard with volume id v that A does not have { + move one ec shard v from B to A + hasMovedOneEcShard = true + } + } + } + } + +` +} + +func (c *commandEcBalance) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + balanceCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + collection := balanceCommand.String("collection", "EACH_COLLECTION", "collection name, or \"EACH_COLLECTION\" for each collection") + dc := balanceCommand.String("dataCenter", "", "only apply the balancing for this dataCenter") + applyBalancing := balanceCommand.Bool("force", false, "apply the balancing plan") + if err = balanceCommand.Parse(args); err != nil { + return nil + } + + // collect all ec nodes + allEcNodes, totalFreeEcSlots, err := collectEcNodes(commandEnv, *dc) + if err != nil { + return err + } + if totalFreeEcSlots < 1 { + return fmt.Errorf("no free ec shard slots. only %d left", totalFreeEcSlots) + } + + racks := collectRacks(allEcNodes) + + if *collection == "EACH_COLLECTION" { + collections, err := ListCollectionNames(commandEnv, false, true) + if err != nil { + return err + } + fmt.Printf("balanceEcVolumes collections %+v\n", len(collections)) + for _, c := range collections { + fmt.Printf("balanceEcVolumes collection %+v\n", c) + if err = balanceEcVolumes(commandEnv, c, allEcNodes, racks, *applyBalancing); err != nil { + return err + } + } + } else { + if err = balanceEcVolumes(commandEnv, *collection, allEcNodes, racks, *applyBalancing); err != nil { + return err + } + } + + if err := balanceEcRacks(commandEnv, racks, *applyBalancing); err != nil { + return fmt.Errorf("balance ec racks: %v", err) + } + + return nil +} + +func collectRacks(allEcNodes []*EcNode) map[RackId]*EcRack { + // collect racks info + racks := make(map[RackId]*EcRack) + for _, ecNode := range allEcNodes { + if racks[ecNode.rack] == nil { + racks[ecNode.rack] = &EcRack{ + ecNodes: make(map[EcNodeId]*EcNode), + } + } + racks[ecNode.rack].ecNodes[EcNodeId(ecNode.info.Id)] = ecNode + racks[ecNode.rack].freeEcSlot += ecNode.freeEcSlot + } + return racks +} + +func balanceEcVolumes(commandEnv *CommandEnv, collection string, allEcNodes []*EcNode, racks map[RackId]*EcRack, applyBalancing bool) error { + + fmt.Printf("balanceEcVolumes %s\n", collection) + + if err := deleteDuplicatedEcShards(commandEnv, allEcNodes, collection, applyBalancing); err != nil { + return fmt.Errorf("delete duplicated collection %s ec shards: %v", collection, err) + } + + if err := balanceEcShardsAcrossRacks(commandEnv, allEcNodes, racks, collection, applyBalancing); err != nil { + return fmt.Errorf("balance across racks collection %s ec shards: %v", collection, err) + } + + if err := balanceEcShardsWithinRacks(commandEnv, allEcNodes, racks, collection, applyBalancing); err != nil { + return fmt.Errorf("balance across racks collection %s ec shards: %v", collection, err) + } + + return nil +} + +func deleteDuplicatedEcShards(commandEnv *CommandEnv, allEcNodes []*EcNode, collection string, applyBalancing bool) error { + // vid => []ecNode + vidLocations := collectVolumeIdToEcNodes(allEcNodes) + // deduplicate ec shards + for vid, locations := range vidLocations { + if err := doDeduplicateEcShards(commandEnv, collection, vid, locations, applyBalancing); err != nil { + return err + } + } + return nil +} + +func doDeduplicateEcShards(commandEnv *CommandEnv, collection string, vid needle.VolumeId, locations []*EcNode, applyBalancing bool) error { + + // check whether this volume has ecNodes that are over average + shardToLocations := make([][]*EcNode, erasure_coding.TotalShardsCount) + for _, ecNode := range locations { + shardBits := findEcVolumeShards(ecNode, vid) + for _, shardId := range shardBits.ShardIds() { + shardToLocations[shardId] = append(shardToLocations[shardId], ecNode) + } + } + for shardId, ecNodes := range shardToLocations { + if len(ecNodes) <= 1 { + continue + } + sortEcNodesByFreeslotsAscending(ecNodes) + fmt.Printf("ec shard %d.%d has %d copies, keeping %v\n", vid, shardId, len(ecNodes), ecNodes[0].info.Id) + if !applyBalancing { + continue + } + + duplicatedShardIds := []uint32{uint32(shardId)} + for _, ecNode := range ecNodes[1:] { + if err := unmountEcShards(commandEnv.option.GrpcDialOption, vid, ecNode.info.Id, duplicatedShardIds); err != nil { + return err + } + if err := sourceServerDeleteEcShards(commandEnv.option.GrpcDialOption, collection, vid, ecNode.info.Id, duplicatedShardIds); err != nil { + return err + } + ecNode.deleteEcVolumeShards(vid, duplicatedShardIds) + } + } + return nil +} + +func balanceEcShardsAcrossRacks(commandEnv *CommandEnv, allEcNodes []*EcNode, racks map[RackId]*EcRack, collection string, applyBalancing bool) error { + // collect vid => []ecNode, since previous steps can change the locations + vidLocations := collectVolumeIdToEcNodes(allEcNodes) + // spread the ec shards evenly + for vid, locations := range vidLocations { + if err := doBalanceEcShardsAcrossRacks(commandEnv, collection, vid, locations, racks, applyBalancing); err != nil { + return err + } + } + return nil +} + +func doBalanceEcShardsAcrossRacks(commandEnv *CommandEnv, collection string, vid needle.VolumeId, locations []*EcNode, racks map[RackId]*EcRack, applyBalancing bool) error { + + // calculate average number of shards an ec rack should have for one volume + averageShardsPerEcRack := ceilDivide(erasure_coding.TotalShardsCount, len(racks)) + + // see the volume's shards are in how many racks, and how many in each rack + rackToShardCount := groupByCount(locations, func(ecNode *EcNode) (id string, count int) { + shardBits := findEcVolumeShards(ecNode, vid) + return string(ecNode.rack), shardBits.ShardIdCount() + }) + rackEcNodesWithVid := groupBy(locations, func(ecNode *EcNode) string { + return string(ecNode.rack) + }) + + // ecShardsToMove = select overflown ec shards from racks with ec shard counts > averageShardsPerEcRack + ecShardsToMove := make(map[erasure_coding.ShardId]*EcNode) + for rackId, count := range rackToShardCount { + if count > averageShardsPerEcRack { + possibleEcNodes := rackEcNodesWithVid[rackId] + for shardId, ecNode := range pickNEcShardsToMoveFrom(possibleEcNodes, vid, count-averageShardsPerEcRack) { + ecShardsToMove[shardId] = ecNode + } + } + } + + for shardId, ecNode := range ecShardsToMove { + rackId := pickOneRack(racks, rackToShardCount, averageShardsPerEcRack) + if rackId == "" { + fmt.Printf("ec shard %d.%d at %s can not find a destination rack\n", vid, shardId, ecNode.info.Id) + continue + } + var possibleDestinationEcNodes []*EcNode + for _, n := range racks[rackId].ecNodes { + possibleDestinationEcNodes = append(possibleDestinationEcNodes, n) + } + err := pickOneEcNodeAndMoveOneShard(commandEnv, averageShardsPerEcRack, ecNode, collection, vid, shardId, possibleDestinationEcNodes, applyBalancing) + if err != nil { + return err + } + rackToShardCount[string(rackId)] += 1 + rackToShardCount[string(ecNode.rack)] -= 1 + racks[rackId].freeEcSlot -= 1 + racks[ecNode.rack].freeEcSlot += 1 + } + + return nil +} + +func pickOneRack(rackToEcNodes map[RackId]*EcRack, rackToShardCount map[string]int, averageShardsPerEcRack int) RackId { + + // TODO later may need to add some randomness + + for rackId, rack := range rackToEcNodes { + if rackToShardCount[string(rackId)] >= averageShardsPerEcRack { + continue + } + + if rack.freeEcSlot <= 0 { + continue + } + + return rackId + } + + return "" +} + +func balanceEcShardsWithinRacks(commandEnv *CommandEnv, allEcNodes []*EcNode, racks map[RackId]*EcRack, collection string, applyBalancing bool) error { + // collect vid => []ecNode, since previous steps can change the locations + vidLocations := collectVolumeIdToEcNodes(allEcNodes) + + // spread the ec shards evenly + for vid, locations := range vidLocations { + + // see the volume's shards are in how many racks, and how many in each rack + rackToShardCount := groupByCount(locations, func(ecNode *EcNode) (id string, count int) { + shardBits := findEcVolumeShards(ecNode, vid) + return string(ecNode.rack), shardBits.ShardIdCount() + }) + rackEcNodesWithVid := groupBy(locations, func(ecNode *EcNode) string { + return string(ecNode.rack) + }) + + for rackId, _ := range rackToShardCount { + + var possibleDestinationEcNodes []*EcNode + for _, n := range racks[RackId(rackId)].ecNodes { + possibleDestinationEcNodes = append(possibleDestinationEcNodes, n) + } + sourceEcNodes := rackEcNodesWithVid[rackId] + averageShardsPerEcNode := ceilDivide(rackToShardCount[rackId], len(possibleDestinationEcNodes)) + if err := doBalanceEcShardsWithinOneRack(commandEnv, averageShardsPerEcNode, collection, vid, sourceEcNodes, possibleDestinationEcNodes, applyBalancing); err != nil { + return err + } + } + } + return nil +} + +func doBalanceEcShardsWithinOneRack(commandEnv *CommandEnv, averageShardsPerEcNode int, collection string, vid needle.VolumeId, existingLocations, possibleDestinationEcNodes []*EcNode, applyBalancing bool) error { + + for _, ecNode := range existingLocations { + + shardBits := findEcVolumeShards(ecNode, vid) + overLimitCount := shardBits.ShardIdCount() - averageShardsPerEcNode + + for _, shardId := range shardBits.ShardIds() { + + if overLimitCount <= 0 { + break + } + + fmt.Printf("%s has %d overlimit, moving ec shard %d.%d\n", ecNode.info.Id, overLimitCount, vid, shardId) + + err := pickOneEcNodeAndMoveOneShard(commandEnv, averageShardsPerEcNode, ecNode, collection, vid, shardId, possibleDestinationEcNodes, applyBalancing) + if err != nil { + return err + } + + overLimitCount-- + } + } + + return nil +} + +func balanceEcRacks(commandEnv *CommandEnv, racks map[RackId]*EcRack, applyBalancing bool) error { + + // balance one rack for all ec shards + for _, ecRack := range racks { + if err := doBalanceEcRack(commandEnv, ecRack, applyBalancing); err != nil { + return err + } + } + return nil +} + +func doBalanceEcRack(commandEnv *CommandEnv, ecRack *EcRack, applyBalancing bool) error { + + if len(ecRack.ecNodes) <= 1 { + return nil + } + + var rackEcNodes []*EcNode + for _, node := range ecRack.ecNodes { + rackEcNodes = append(rackEcNodes, node) + } + + ecNodeIdToShardCount := groupByCount(rackEcNodes, func(node *EcNode) (id string, count int) { + for _, ecShardInfo := range node.info.EcShardInfos { + count += erasure_coding.ShardBits(ecShardInfo.EcIndexBits).ShardIdCount() + } + return node.info.Id, count + }) + + var totalShardCount int + for _, count := range ecNodeIdToShardCount { + totalShardCount += count + } + + averageShardCount := ceilDivide(totalShardCount, len(rackEcNodes)) + + hasMove := true + for hasMove { + hasMove = false + sort.Slice(rackEcNodes, func(i, j int) bool { + return rackEcNodes[i].freeEcSlot > rackEcNodes[j].freeEcSlot + }) + emptyNode, fullNode := rackEcNodes[0], rackEcNodes[len(rackEcNodes)-1] + emptyNodeShardCount, fullNodeShardCount := ecNodeIdToShardCount[emptyNode.info.Id], ecNodeIdToShardCount[fullNode.info.Id] + if fullNodeShardCount > averageShardCount && emptyNodeShardCount+1 <= averageShardCount { + + emptyNodeIds := make(map[uint32]bool) + for _, shards := range emptyNode.info.EcShardInfos { + emptyNodeIds[shards.Id] = true + } + for _, shards := range fullNode.info.EcShardInfos { + if _, found := emptyNodeIds[shards.Id]; !found { + for _, shardId := range erasure_coding.ShardBits(shards.EcIndexBits).ShardIds() { + + fmt.Printf("%s moves ec shards %d.%d to %s\n", fullNode.info.Id, shards.Id, shardId, emptyNode.info.Id) + + err := moveMountedShardToEcNode(commandEnv, fullNode, shards.Collection, needle.VolumeId(shards.Id), shardId, emptyNode, applyBalancing) + if err != nil { + return err + } + + ecNodeIdToShardCount[emptyNode.info.Id]++ + ecNodeIdToShardCount[fullNode.info.Id]-- + hasMove = true + break + } + break + } + } + } + } + + return nil +} + +func pickOneEcNodeAndMoveOneShard(commandEnv *CommandEnv, averageShardsPerEcNode int, existingLocation *EcNode, collection string, vid needle.VolumeId, shardId erasure_coding.ShardId, possibleDestinationEcNodes []*EcNode, applyBalancing bool) error { + + sortEcNodesByFreeslotsDecending(possibleDestinationEcNodes) + + for _, destEcNode := range possibleDestinationEcNodes { + if destEcNode.info.Id == existingLocation.info.Id { + continue + } + + if destEcNode.freeEcSlot <= 0 { + continue + } + if findEcVolumeShards(destEcNode, vid).ShardIdCount() >= averageShardsPerEcNode { + continue + } + + fmt.Printf("%s moves ec shard %d.%d to %s\n", existingLocation.info.Id, vid, shardId, destEcNode.info.Id) + + err := moveMountedShardToEcNode(commandEnv, existingLocation, collection, vid, shardId, destEcNode, applyBalancing) + if err != nil { + return err + } + + return nil + } + + return nil +} + +func pickNEcShardsToMoveFrom(ecNodes []*EcNode, vid needle.VolumeId, n int) map[erasure_coding.ShardId]*EcNode { + picked := make(map[erasure_coding.ShardId]*EcNode) + var candidateEcNodes []*CandidateEcNode + for _, ecNode := range ecNodes { + shardBits := findEcVolumeShards(ecNode, vid) + if shardBits.ShardIdCount() > 0 { + candidateEcNodes = append(candidateEcNodes, &CandidateEcNode{ + ecNode: ecNode, + shardCount: shardBits.ShardIdCount(), + }) + } + } + sort.Slice(candidateEcNodes, func(i, j int) bool { + return candidateEcNodes[i].shardCount > candidateEcNodes[j].shardCount + }) + for i := 0; i < n; i++ { + selectedEcNodeIndex := -1 + for i, candidateEcNode := range candidateEcNodes { + shardBits := findEcVolumeShards(candidateEcNode.ecNode, vid) + if shardBits > 0 { + selectedEcNodeIndex = i + for _, shardId := range shardBits.ShardIds() { + candidateEcNode.shardCount-- + picked[shardId] = candidateEcNode.ecNode + candidateEcNode.ecNode.deleteEcVolumeShards(vid, []uint32{uint32(shardId)}) + break + } + break + } + } + if selectedEcNodeIndex >= 0 { + ensureSortedEcNodes(candidateEcNodes, selectedEcNodeIndex, func(i, j int) bool { + return candidateEcNodes[i].shardCount > candidateEcNodes[j].shardCount + }) + } + + } + return picked +} + +func collectVolumeIdToEcNodes(allEcNodes []*EcNode) map[needle.VolumeId][]*EcNode { + vidLocations := make(map[needle.VolumeId][]*EcNode) + for _, ecNode := range allEcNodes { + for _, shardInfo := range ecNode.info.EcShardInfos { + vidLocations[needle.VolumeId(shardInfo.Id)] = append(vidLocations[needle.VolumeId(shardInfo.Id)], ecNode) + } + } + return vidLocations +} diff --git a/weed/shell/command_ec_common.go b/weed/shell/command_ec_common.go new file mode 100644 index 000000000..0db119d3c --- /dev/null +++ b/weed/shell/command_ec_common.go @@ -0,0 +1,337 @@ +package shell + +import ( + "context" + "fmt" + "math" + "sort" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "google.golang.org/grpc" +) + +func moveMountedShardToEcNode(commandEnv *CommandEnv, existingLocation *EcNode, collection string, vid needle.VolumeId, shardId erasure_coding.ShardId, destinationEcNode *EcNode, applyBalancing bool) (err error) { + + copiedShardIds := []uint32{uint32(shardId)} + + if applyBalancing { + + // ask destination node to copy shard and the ecx file from source node, and mount it + copiedShardIds, err = oneServerCopyAndMountEcShardsFromSource(commandEnv.option.GrpcDialOption, destinationEcNode, []uint32{uint32(shardId)}, vid, collection, existingLocation.info.Id) + if err != nil { + return err + } + + // unmount the to be deleted shards + err = unmountEcShards(commandEnv.option.GrpcDialOption, vid, existingLocation.info.Id, copiedShardIds) + if err != nil { + return err + } + + // ask source node to delete the shard, and maybe the ecx file + err = sourceServerDeleteEcShards(commandEnv.option.GrpcDialOption, collection, vid, existingLocation.info.Id, copiedShardIds) + if err != nil { + return err + } + + fmt.Printf("moved ec shard %d.%d %s => %s\n", vid, shardId, existingLocation.info.Id, destinationEcNode.info.Id) + + } + + destinationEcNode.addEcVolumeShards(vid, collection, copiedShardIds) + existingLocation.deleteEcVolumeShards(vid, copiedShardIds) + + return nil + +} + +func oneServerCopyAndMountEcShardsFromSource(grpcDialOption grpc.DialOption, + targetServer *EcNode, shardIdsToCopy []uint32, + volumeId needle.VolumeId, collection string, existingLocation string) (copiedShardIds []uint32, err error) { + + fmt.Printf("allocate %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id) + + err = operation.WithVolumeServerClient(targetServer.info.Id, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + + if targetServer.info.Id != existingLocation { + + fmt.Printf("copy %d.%v %s => %s\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id) + _, copyErr := volumeServerClient.VolumeEcShardsCopy(context.Background(), &volume_server_pb.VolumeEcShardsCopyRequest{ + VolumeId: uint32(volumeId), + Collection: collection, + ShardIds: shardIdsToCopy, + CopyEcxFile: true, + CopyEcjFile: true, + CopyVifFile: true, + SourceDataNode: existingLocation, + }) + if copyErr != nil { + return fmt.Errorf("copy %d.%v %s => %s : %v\n", volumeId, shardIdsToCopy, existingLocation, targetServer.info.Id, copyErr) + } + } + + fmt.Printf("mount %d.%v on %s\n", volumeId, shardIdsToCopy, targetServer.info.Id) + _, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{ + VolumeId: uint32(volumeId), + Collection: collection, + ShardIds: shardIdsToCopy, + }) + if mountErr != nil { + return fmt.Errorf("mount %d.%v on %s : %v\n", volumeId, shardIdsToCopy, targetServer.info.Id, mountErr) + } + + if targetServer.info.Id != existingLocation { + copiedShardIds = shardIdsToCopy + glog.V(0).Infof("%s ec volume %d deletes shards %+v", existingLocation, volumeId, copiedShardIds) + } + + return nil + }) + + if err != nil { + return + } + + return +} + +func eachDataNode(topo *master_pb.TopologyInfo, fn func(dc string, rack RackId, dn *master_pb.DataNodeInfo)) { + for _, dc := range topo.DataCenterInfos { + for _, rack := range dc.RackInfos { + for _, dn := range rack.DataNodeInfos { + fn(dc.Id, RackId(rack.Id), dn) + } + } + } +} + +func sortEcNodesByFreeslotsDecending(ecNodes []*EcNode) { + sort.Slice(ecNodes, func(i, j int) bool { + return ecNodes[i].freeEcSlot > ecNodes[j].freeEcSlot + }) +} + +func sortEcNodesByFreeslotsAscending(ecNodes []*EcNode) { + sort.Slice(ecNodes, func(i, j int) bool { + return ecNodes[i].freeEcSlot < ecNodes[j].freeEcSlot + }) +} + +type CandidateEcNode struct { + ecNode *EcNode + shardCount int +} + +// if the index node changed the freeEcSlot, need to keep every EcNode still sorted +func ensureSortedEcNodes(data []*CandidateEcNode, index int, lessThan func(i, j int) bool) { + for i := index - 1; i >= 0; i-- { + if lessThan(i+1, i) { + swap(data, i, i+1) + } else { + break + } + } + for i := index + 1; i < len(data); i++ { + if lessThan(i, i-1) { + swap(data, i, i-1) + } else { + break + } + } +} + +func swap(data []*CandidateEcNode, i, j int) { + t := data[i] + data[i] = data[j] + data[j] = t +} + +func countShards(ecShardInfos []*master_pb.VolumeEcShardInformationMessage) (count int) { + for _, ecShardInfo := range ecShardInfos { + shardBits := erasure_coding.ShardBits(ecShardInfo.EcIndexBits) + count += shardBits.ShardIdCount() + } + return +} + +func countFreeShardSlots(dn *master_pb.DataNodeInfo) (count int) { + return int(dn.MaxVolumeCount-dn.ActiveVolumeCount)*erasure_coding.DataShardsCount - countShards(dn.EcShardInfos) +} + +type RackId string +type EcNodeId string + +type EcNode struct { + info *master_pb.DataNodeInfo + dc string + rack RackId + freeEcSlot int +} + +type EcRack struct { + ecNodes map[EcNodeId]*EcNode + freeEcSlot int +} + +func collectEcNodes(commandEnv *CommandEnv, selectedDataCenter string) (ecNodes []*EcNode, totalFreeEcSlots int, err error) { + + // list all possible locations + var resp *master_pb.VolumeListResponse + err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{}) + return err + }) + if err != nil { + return nil, 0, err + } + + // find out all volume servers with one slot left. + eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { + if selectedDataCenter != "" && selectedDataCenter != dc { + return + } + + freeEcSlots := countFreeShardSlots(dn) + ecNodes = append(ecNodes, &EcNode{ + info: dn, + dc: dc, + rack: rack, + freeEcSlot: int(freeEcSlots), + }) + totalFreeEcSlots += freeEcSlots + }) + + sortEcNodesByFreeslotsDecending(ecNodes) + + return +} + +func sourceServerDeleteEcShards(grpcDialOption grpc.DialOption, collection string, volumeId needle.VolumeId, sourceLocation string, toBeDeletedShardIds []uint32) error { + + fmt.Printf("delete %d.%v from %s\n", volumeId, toBeDeletedShardIds, sourceLocation) + + return operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, deleteErr := volumeServerClient.VolumeEcShardsDelete(context.Background(), &volume_server_pb.VolumeEcShardsDeleteRequest{ + VolumeId: uint32(volumeId), + Collection: collection, + ShardIds: toBeDeletedShardIds, + }) + return deleteErr + }) + +} + +func unmountEcShards(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceLocation string, toBeUnmountedhardIds []uint32) error { + + fmt.Printf("unmount %d.%v from %s\n", volumeId, toBeUnmountedhardIds, sourceLocation) + + return operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, deleteErr := volumeServerClient.VolumeEcShardsUnmount(context.Background(), &volume_server_pb.VolumeEcShardsUnmountRequest{ + VolumeId: uint32(volumeId), + ShardIds: toBeUnmountedhardIds, + }) + return deleteErr + }) +} + +func mountEcShards(grpcDialOption grpc.DialOption, collection string, volumeId needle.VolumeId, sourceLocation string, toBeMountedhardIds []uint32) error { + + fmt.Printf("mount %d.%v on %s\n", volumeId, toBeMountedhardIds, sourceLocation) + + return operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, mountErr := volumeServerClient.VolumeEcShardsMount(context.Background(), &volume_server_pb.VolumeEcShardsMountRequest{ + VolumeId: uint32(volumeId), + Collection: collection, + ShardIds: toBeMountedhardIds, + }) + return mountErr + }) +} + +func ceilDivide(total, n int) int { + return int(math.Ceil(float64(total) / float64(n))) +} + +func findEcVolumeShards(ecNode *EcNode, vid needle.VolumeId) erasure_coding.ShardBits { + + for _, shardInfo := range ecNode.info.EcShardInfos { + if needle.VolumeId(shardInfo.Id) == vid { + return erasure_coding.ShardBits(shardInfo.EcIndexBits) + } + } + + return 0 +} + +func (ecNode *EcNode) addEcVolumeShards(vid needle.VolumeId, collection string, shardIds []uint32) *EcNode { + + foundVolume := false + for _, shardInfo := range ecNode.info.EcShardInfos { + if needle.VolumeId(shardInfo.Id) == vid { + oldShardBits := erasure_coding.ShardBits(shardInfo.EcIndexBits) + newShardBits := oldShardBits + for _, shardId := range shardIds { + newShardBits = newShardBits.AddShardId(erasure_coding.ShardId(shardId)) + } + shardInfo.EcIndexBits = uint32(newShardBits) + ecNode.freeEcSlot -= newShardBits.ShardIdCount() - oldShardBits.ShardIdCount() + foundVolume = true + break + } + } + + if !foundVolume { + var newShardBits erasure_coding.ShardBits + for _, shardId := range shardIds { + newShardBits = newShardBits.AddShardId(erasure_coding.ShardId(shardId)) + } + ecNode.info.EcShardInfos = append(ecNode.info.EcShardInfos, &master_pb.VolumeEcShardInformationMessage{ + Id: uint32(vid), + Collection: collection, + EcIndexBits: uint32(newShardBits), + }) + ecNode.freeEcSlot -= len(shardIds) + } + + return ecNode +} + +func (ecNode *EcNode) deleteEcVolumeShards(vid needle.VolumeId, shardIds []uint32) *EcNode { + + for _, shardInfo := range ecNode.info.EcShardInfos { + if needle.VolumeId(shardInfo.Id) == vid { + oldShardBits := erasure_coding.ShardBits(shardInfo.EcIndexBits) + newShardBits := oldShardBits + for _, shardId := range shardIds { + newShardBits = newShardBits.RemoveShardId(erasure_coding.ShardId(shardId)) + } + shardInfo.EcIndexBits = uint32(newShardBits) + ecNode.freeEcSlot -= newShardBits.ShardIdCount() - oldShardBits.ShardIdCount() + } + } + + return ecNode +} + +func groupByCount(data []*EcNode, identifierFn func(*EcNode) (id string, count int)) map[string]int { + countMap := make(map[string]int) + for _, d := range data { + id, count := identifierFn(d) + countMap[id] += count + } + return countMap +} + +func groupBy(data []*EcNode, identifierFn func(*EcNode) (id string)) map[string][]*EcNode { + groupMap := make(map[string][]*EcNode) + for _, d := range data { + id := identifierFn(d) + groupMap[id] = append(groupMap[id], d) + } + return groupMap +} diff --git a/weed/shell/command_ec_decode.go b/weed/shell/command_ec_decode.go new file mode 100644 index 000000000..b69e403cb --- /dev/null +++ b/weed/shell/command_ec_decode.go @@ -0,0 +1,264 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "io" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func init() { + Commands = append(Commands, &commandEcDecode{}) +} + +type commandEcDecode struct { +} + +func (c *commandEcDecode) Name() string { + return "ec.decode" +} + +func (c *commandEcDecode) Help() string { + return `decode a erasure coded volume into a normal volume + + ec.decode [-collection=""] [-volumeId=<volume_id>] + +` +} + +func (c *commandEcDecode) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + encodeCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + volumeId := encodeCommand.Int("volumeId", 0, "the volume id") + collection := encodeCommand.String("collection", "", "the collection name") + if err = encodeCommand.Parse(args); err != nil { + return nil + } + + vid := needle.VolumeId(*volumeId) + + // collect topology information + topologyInfo, err := collectTopologyInfo(commandEnv) + if err != nil { + return err + } + + // volumeId is provided + if vid != 0 { + return doEcDecode(commandEnv, topologyInfo, *collection, vid) + } + + // apply to all volumes in the collection + volumeIds := collectEcShardIds(topologyInfo, *collection) + fmt.Printf("ec encode volumes: %v\n", volumeIds) + for _, vid := range volumeIds { + if err = doEcDecode(commandEnv, topologyInfo, *collection, vid); err != nil { + return err + } + } + + return nil +} + +func doEcDecode(commandEnv *CommandEnv, topoInfo *master_pb.TopologyInfo, collection string, vid needle.VolumeId) (err error) { + // find volume location + nodeToEcIndexBits := collectEcNodeShardBits(topoInfo, vid) + + fmt.Printf("ec volume %d shard locations: %+v\n", vid, nodeToEcIndexBits) + + // collect ec shards to the server with most space + targetNodeLocation, err := collectEcShards(commandEnv, nodeToEcIndexBits, collection, vid) + if err != nil { + return fmt.Errorf("collectEcShards for volume %d: %v", vid, err) + } + + // generate a normal volume + err = generateNormalVolume(commandEnv.option.GrpcDialOption, needle.VolumeId(vid), collection, targetNodeLocation) + if err != nil { + return fmt.Errorf("generate normal volume %d on %s: %v", vid, targetNodeLocation, err) + } + + // delete the previous ec shards + err = mountVolumeAndDeleteEcShards(commandEnv.option.GrpcDialOption, collection, targetNodeLocation, nodeToEcIndexBits, vid) + if err != nil { + return fmt.Errorf("delete ec shards for volume %d: %v", vid, err) + } + + return nil +} + +func mountVolumeAndDeleteEcShards(grpcDialOption grpc.DialOption, collection, targetNodeLocation string, nodeToEcIndexBits map[string]erasure_coding.ShardBits, vid needle.VolumeId) error { + + // mount volume + if err := operation.WithVolumeServerClient(targetNodeLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, mountErr := volumeServerClient.VolumeMount(context.Background(), &volume_server_pb.VolumeMountRequest{ + VolumeId: uint32(vid), + }) + return mountErr + }); err != nil { + return fmt.Errorf("mountVolumeAndDeleteEcShards mount volume %d on %s: %v", vid, targetNodeLocation, err) + } + + // unmount ec shards + for location, ecIndexBits := range nodeToEcIndexBits { + fmt.Printf("unmount ec volume %d on %s has shards: %+v\n", vid, location, ecIndexBits.ShardIds()) + err := unmountEcShards(grpcDialOption, vid, location, ecIndexBits.ToUint32Slice()) + if err != nil { + return fmt.Errorf("mountVolumeAndDeleteEcShards unmount ec volume %d on %s: %v", vid, location, err) + } + } + // delete ec shards + for location, ecIndexBits := range nodeToEcIndexBits { + fmt.Printf("delete ec volume %d on %s has shards: %+v\n", vid, location, ecIndexBits.ShardIds()) + err := sourceServerDeleteEcShards(grpcDialOption, collection, vid, location, ecIndexBits.ToUint32Slice()) + if err != nil { + return fmt.Errorf("mountVolumeAndDeleteEcShards delete ec volume %d on %s: %v", vid, location, err) + } + } + + return nil +} + +func generateNormalVolume(grpcDialOption grpc.DialOption, vid needle.VolumeId, collection string, sourceVolumeServer string) error { + + fmt.Printf("generateNormalVolume from ec volume %d on %s\n", vid, sourceVolumeServer) + + err := operation.WithVolumeServerClient(sourceVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, genErr := volumeServerClient.VolumeEcShardsToVolume(context.Background(), &volume_server_pb.VolumeEcShardsToVolumeRequest{ + VolumeId: uint32(vid), + Collection: collection, + }) + return genErr + }) + + return err + +} + +func collectEcShards(commandEnv *CommandEnv, nodeToEcIndexBits map[string]erasure_coding.ShardBits, collection string, vid needle.VolumeId) (targetNodeLocation string, err error) { + + maxShardCount := 0 + var exisitngEcIndexBits erasure_coding.ShardBits + for loc, ecIndexBits := range nodeToEcIndexBits { + toBeCopiedShardCount := ecIndexBits.MinusParityShards().ShardIdCount() + if toBeCopiedShardCount > maxShardCount { + maxShardCount = toBeCopiedShardCount + targetNodeLocation = loc + exisitngEcIndexBits = ecIndexBits + } + } + + fmt.Printf("collectEcShards: ec volume %d collect shards to %s from: %+v\n", vid, targetNodeLocation, nodeToEcIndexBits) + + var copiedEcIndexBits erasure_coding.ShardBits + for loc, ecIndexBits := range nodeToEcIndexBits { + if loc == targetNodeLocation { + continue + } + + needToCopyEcIndexBits := ecIndexBits.Minus(exisitngEcIndexBits).MinusParityShards() + if needToCopyEcIndexBits.ShardIdCount() == 0 { + continue + } + + err = operation.WithVolumeServerClient(targetNodeLocation, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + + fmt.Printf("copy %d.%v %s => %s\n", vid, needToCopyEcIndexBits.ShardIds(), loc, targetNodeLocation) + + _, copyErr := volumeServerClient.VolumeEcShardsCopy(context.Background(), &volume_server_pb.VolumeEcShardsCopyRequest{ + VolumeId: uint32(vid), + Collection: collection, + ShardIds: needToCopyEcIndexBits.ToUint32Slice(), + CopyEcxFile: false, + CopyEcjFile: true, + CopyVifFile: true, + SourceDataNode: loc, + }) + if copyErr != nil { + return fmt.Errorf("copy %d.%v %s => %s : %v\n", vid, needToCopyEcIndexBits.ShardIds(), loc, targetNodeLocation, copyErr) + } + + return nil + }) + + if err != nil { + break + } + + copiedEcIndexBits = copiedEcIndexBits.Plus(needToCopyEcIndexBits) + + } + + nodeToEcIndexBits[targetNodeLocation] = exisitngEcIndexBits.Plus(copiedEcIndexBits) + + return targetNodeLocation, err + +} + +func collectTopologyInfo(commandEnv *CommandEnv) (topoInfo *master_pb.TopologyInfo, err error) { + + var resp *master_pb.VolumeListResponse + err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{}) + return err + }) + if err != nil { + return + } + + return resp.TopologyInfo, nil + +} + +func collectEcShardInfos(topoInfo *master_pb.TopologyInfo, selectedCollection string, vid needle.VolumeId) (ecShardInfos []*master_pb.VolumeEcShardInformationMessage) { + + eachDataNode(topoInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { + for _, v := range dn.EcShardInfos { + if v.Collection == selectedCollection && v.Id == uint32(vid) { + ecShardInfos = append(ecShardInfos, v) + } + } + }) + + return +} + +func collectEcShardIds(topoInfo *master_pb.TopologyInfo, selectedCollection string) (vids []needle.VolumeId) { + + vidMap := make(map[uint32]bool) + eachDataNode(topoInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { + for _, v := range dn.EcShardInfos { + if v.Collection == selectedCollection { + vidMap[v.Id] = true + } + } + }) + + for vid := range vidMap { + vids = append(vids, needle.VolumeId(vid)) + } + + return +} + +func collectEcNodeShardBits(topoInfo *master_pb.TopologyInfo, vid needle.VolumeId) map[string]erasure_coding.ShardBits { + + nodeToEcIndexBits := make(map[string]erasure_coding.ShardBits) + eachDataNode(topoInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { + for _, v := range dn.EcShardInfos { + if v.Id == uint32(vid) { + nodeToEcIndexBits[dn.Id] = erasure_coding.ShardBits(v.EcIndexBits) + } + } + }) + + return nodeToEcIndexBits +} diff --git a/weed/shell/command_ec_encode.go b/weed/shell/command_ec_encode.go new file mode 100644 index 000000000..6efb05488 --- /dev/null +++ b/weed/shell/command_ec_encode.go @@ -0,0 +1,287 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "io" + "sync" + "time" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/wdclient" +) + +func init() { + Commands = append(Commands, &commandEcEncode{}) +} + +type commandEcEncode struct { +} + +func (c *commandEcEncode) Name() string { + return "ec.encode" +} + +func (c *commandEcEncode) Help() string { + return `apply erasure coding to a volume + + ec.encode [-collection=""] [-fullPercent=95] [-quietFor=1h] + ec.encode [-collection=""] [-volumeId=<volume_id>] + + This command will: + 1. freeze one volume + 2. apply erasure coding to the volume + 3. move the encoded shards to multiple volume servers + + The erasure coding is 10.4. So ideally you have more than 14 volume servers, and you can afford + to lose 4 volume servers. + + If the number of volumes are not high, the worst case is that you only have 4 volume servers, + and the shards are spread as 4,4,3,3, respectively. You can afford to lose one volume server. + + If you only have less than 4 volume servers, with erasure coding, at least you can afford to + have 4 corrupted shard files. + +` +} + +func (c *commandEcEncode) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + encodeCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + volumeId := encodeCommand.Int("volumeId", 0, "the volume id") + collection := encodeCommand.String("collection", "", "the collection name") + fullPercentage := encodeCommand.Float64("fullPercent", 95, "the volume reaches the percentage of max volume size") + quietPeriod := encodeCommand.Duration("quietFor", time.Hour, "select volumes without no writes for this period") + if err = encodeCommand.Parse(args); err != nil { + return nil + } + + vid := needle.VolumeId(*volumeId) + + // volumeId is provided + if vid != 0 { + return doEcEncode(commandEnv, *collection, vid) + } + + // apply to all volumes in the collection + volumeIds, err := collectVolumeIdsForEcEncode(commandEnv, *collection, *fullPercentage, *quietPeriod) + if err != nil { + return err + } + fmt.Printf("ec encode volumes: %v\n", volumeIds) + for _, vid := range volumeIds { + if err = doEcEncode(commandEnv, *collection, vid); err != nil { + return err + } + } + + return nil +} + +func doEcEncode(commandEnv *CommandEnv, collection string, vid needle.VolumeId) (err error) { + // find volume location + locations, found := commandEnv.MasterClient.GetLocations(uint32(vid)) + if !found { + return fmt.Errorf("volume %d not found", vid) + } + + // fmt.Printf("found ec %d shards on %v\n", vid, locations) + + // mark the volume as readonly + err = markVolumeReadonly(commandEnv.option.GrpcDialOption, needle.VolumeId(vid), locations) + if err != nil { + return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err) + } + + // generate ec shards + err = generateEcShards(commandEnv.option.GrpcDialOption, needle.VolumeId(vid), collection, locations[0].Url) + if err != nil { + return fmt.Errorf("generate ec shards for volume %d on %s: %v", vid, locations[0].Url, err) + } + + // balance the ec shards to current cluster + err = spreadEcShards(commandEnv, vid, collection, locations) + if err != nil { + return fmt.Errorf("spread ec shards for volume %d from %s: %v", vid, locations[0].Url, err) + } + + return nil +} + +func markVolumeReadonly(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, locations []wdclient.Location) error { + + for _, location := range locations { + + err := operation.WithVolumeServerClient(location.Url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, markErr := volumeServerClient.VolumeMarkReadonly(context.Background(), &volume_server_pb.VolumeMarkReadonlyRequest{ + VolumeId: uint32(volumeId), + }) + return markErr + }) + + if err != nil { + return err + } + + } + + return nil +} + +func generateEcShards(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, collection string, sourceVolumeServer string) error { + + err := operation.WithVolumeServerClient(sourceVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, genErr := volumeServerClient.VolumeEcShardsGenerate(context.Background(), &volume_server_pb.VolumeEcShardsGenerateRequest{ + VolumeId: uint32(volumeId), + Collection: collection, + }) + return genErr + }) + + return err + +} + +func spreadEcShards(commandEnv *CommandEnv, volumeId needle.VolumeId, collection string, existingLocations []wdclient.Location) (err error) { + + allEcNodes, totalFreeEcSlots, err := collectEcNodes(commandEnv, "") + if err != nil { + return err + } + + if totalFreeEcSlots < erasure_coding.TotalShardsCount { + return fmt.Errorf("not enough free ec shard slots. only %d left", totalFreeEcSlots) + } + allocatedDataNodes := allEcNodes + if len(allocatedDataNodes) > erasure_coding.TotalShardsCount { + allocatedDataNodes = allocatedDataNodes[:erasure_coding.TotalShardsCount] + } + + // calculate how many shards to allocate for these servers + allocatedEcIds := balancedEcDistribution(allocatedDataNodes) + + // ask the data nodes to copy from the source volume server + copiedShardIds, err := parallelCopyEcShardsFromSource(commandEnv.option.GrpcDialOption, allocatedDataNodes, allocatedEcIds, volumeId, collection, existingLocations[0]) + if err != nil { + return err + } + + // unmount the to be deleted shards + err = unmountEcShards(commandEnv.option.GrpcDialOption, volumeId, existingLocations[0].Url, copiedShardIds) + if err != nil { + return err + } + + // ask the source volume server to clean up copied ec shards + err = sourceServerDeleteEcShards(commandEnv.option.GrpcDialOption, collection, volumeId, existingLocations[0].Url, copiedShardIds) + if err != nil { + return fmt.Errorf("source delete copied ecShards %s %d.%v: %v", existingLocations[0].Url, volumeId, copiedShardIds, err) + } + + // ask the source volume server to delete the original volume + for _, location := range existingLocations { + err = deleteVolume(commandEnv.option.GrpcDialOption, volumeId, location.Url) + if err != nil { + return fmt.Errorf("deleteVolume %s volume %d: %v", location.Url, volumeId, err) + } + } + + return err + +} + +func parallelCopyEcShardsFromSource(grpcDialOption grpc.DialOption, targetServers []*EcNode, allocatedEcIds [][]uint32, volumeId needle.VolumeId, collection string, existingLocation wdclient.Location) (actuallyCopied []uint32, err error) { + + // parallelize + shardIdChan := make(chan []uint32, len(targetServers)) + var wg sync.WaitGroup + for i, server := range targetServers { + if len(allocatedEcIds[i]) <= 0 { + continue + } + + wg.Add(1) + go func(server *EcNode, allocatedEcShardIds []uint32) { + defer wg.Done() + copiedShardIds, copyErr := oneServerCopyAndMountEcShardsFromSource(grpcDialOption, server, + allocatedEcShardIds, volumeId, collection, existingLocation.Url) + if copyErr != nil { + err = copyErr + } else { + shardIdChan <- copiedShardIds + server.addEcVolumeShards(volumeId, collection, copiedShardIds) + } + }(server, allocatedEcIds[i]) + } + wg.Wait() + close(shardIdChan) + + if err != nil { + return nil, err + } + + for shardIds := range shardIdChan { + actuallyCopied = append(actuallyCopied, shardIds...) + } + + return +} + +func balancedEcDistribution(servers []*EcNode) (allocated [][]uint32) { + allocated = make([][]uint32, len(servers)) + allocatedShardIdIndex := uint32(0) + serverIndex := 0 + for allocatedShardIdIndex < erasure_coding.TotalShardsCount { + if servers[serverIndex].freeEcSlot > 0 { + allocated[serverIndex] = append(allocated[serverIndex], allocatedShardIdIndex) + allocatedShardIdIndex++ + } + serverIndex++ + if serverIndex >= len(servers) { + serverIndex = 0 + } + } + + return allocated +} + +func collectVolumeIdsForEcEncode(commandEnv *CommandEnv, selectedCollection string, fullPercentage float64, quietPeriod time.Duration) (vids []needle.VolumeId, err error) { + + var resp *master_pb.VolumeListResponse + err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{}) + return err + }) + if err != nil { + return + } + + quietSeconds := int64(quietPeriod / time.Second) + nowUnixSeconds := time.Now().Unix() + + fmt.Printf("ec encode volumes quiet for: %d seconds\n", quietSeconds) + + vidMap := make(map[uint32]bool) + eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { + for _, v := range dn.VolumeInfos { + if v.Collection == selectedCollection && v.ModifiedAtSecond+quietSeconds < nowUnixSeconds { + if float64(v.Size) > fullPercentage/100*float64(resp.VolumeSizeLimitMb)*1024*1024 { + vidMap[v.Id] = true + } + } + } + }) + + for vid := range vidMap { + vids = append(vids, needle.VolumeId(vid)) + } + + return +} diff --git a/weed/shell/command_ec_rebuild.go b/weed/shell/command_ec_rebuild.go new file mode 100644 index 000000000..d9d943e6d --- /dev/null +++ b/weed/shell/command_ec_rebuild.go @@ -0,0 +1,267 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "google.golang.org/grpc" +) + +func init() { + Commands = append(Commands, &commandEcRebuild{}) +} + +type commandEcRebuild struct { +} + +func (c *commandEcRebuild) Name() string { + return "ec.rebuild" +} + +func (c *commandEcRebuild) Help() string { + return `find and rebuild missing ec shards among volume servers + + ec.rebuild [-c EACH_COLLECTION|<collection_name>] [-force] + + Algorithm: + + For each type of volume server (different max volume count limit){ + for each collection { + rebuildEcVolumes() + } + } + + func rebuildEcVolumes(){ + idealWritableVolumes = totalWritableVolumes / numVolumeServers + for { + sort all volume servers ordered by the number of local writable volumes + pick the volume server A with the lowest number of writable volumes x + pick the volume server B with the highest number of writable volumes y + if y > idealWritableVolumes and x +1 <= idealWritableVolumes { + if B has a writable volume id v that A does not have { + move writable volume v from A to B + } + } + } + } + +` +} + +func (c *commandEcRebuild) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + fixCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + collection := fixCommand.String("collection", "EACH_COLLECTION", "collection name, or \"EACH_COLLECTION\" for each collection") + applyChanges := fixCommand.Bool("force", false, "apply the changes") + if err = fixCommand.Parse(args); err != nil { + return nil + } + + // collect all ec nodes + allEcNodes, _, err := collectEcNodes(commandEnv, "") + if err != nil { + return err + } + + if *collection == "EACH_COLLECTION" { + collections, err := ListCollectionNames(commandEnv, false, true) + if err != nil { + return err + } + fmt.Printf("rebuildEcVolumes collections %+v\n", len(collections)) + for _, c := range collections { + fmt.Printf("rebuildEcVolumes collection %+v\n", c) + if err = rebuildEcVolumes(commandEnv, allEcNodes, c, writer, *applyChanges); err != nil { + return err + } + } + } else { + if err = rebuildEcVolumes(commandEnv, allEcNodes, *collection, writer, *applyChanges); err != nil { + return err + } + } + + return nil +} + +func rebuildEcVolumes(commandEnv *CommandEnv, allEcNodes []*EcNode, collection string, writer io.Writer, applyChanges bool) error { + + fmt.Printf("rebuildEcVolumes %s\n", collection) + + // collect vid => each shard locations, similar to ecShardMap in topology.go + ecShardMap := make(EcShardMap) + for _, ecNode := range allEcNodes { + ecShardMap.registerEcNode(ecNode, collection) + } + + for vid, locations := range ecShardMap { + shardCount := locations.shardCount() + if shardCount == erasure_coding.TotalShardsCount { + continue + } + if shardCount < erasure_coding.DataShardsCount { + return fmt.Errorf("ec volume %d is unrepairable with %d shards\n", vid, shardCount) + } + + sortEcNodesByFreeslotsDecending(allEcNodes) + + if allEcNodes[0].freeEcSlot < erasure_coding.TotalShardsCount { + return fmt.Errorf("disk space is not enough") + } + + if err := rebuildOneEcVolume(commandEnv, allEcNodes[0], collection, vid, locations, writer, applyChanges); err != nil { + return err + } + } + + return nil +} + +func rebuildOneEcVolume(commandEnv *CommandEnv, rebuilder *EcNode, collection string, volumeId needle.VolumeId, locations EcShardLocations, writer io.Writer, applyChanges bool) error { + + fmt.Printf("rebuildOneEcVolume %s %d\n", collection, volumeId) + + // collect shard files to rebuilder local disk + var generatedShardIds []uint32 + copiedShardIds, _, err := prepareDataToRecover(commandEnv, rebuilder, collection, volumeId, locations, writer, applyChanges) + if err != nil { + return err + } + defer func() { + // clean up working files + + // ask the rebuilder to delete the copied shards + err = sourceServerDeleteEcShards(commandEnv.option.GrpcDialOption, collection, volumeId, rebuilder.info.Id, copiedShardIds) + if err != nil { + fmt.Fprintf(writer, "%s delete copied ec shards %s %d.%v\n", rebuilder.info.Id, collection, volumeId, copiedShardIds) + } + + }() + + if !applyChanges { + return nil + } + + // generate ec shards, and maybe ecx file + generatedShardIds, err = generateMissingShards(commandEnv.option.GrpcDialOption, collection, volumeId, rebuilder.info.Id) + if err != nil { + return err + } + + // mount the generated shards + err = mountEcShards(commandEnv.option.GrpcDialOption, collection, volumeId, rebuilder.info.Id, generatedShardIds) + if err != nil { + return err + } + + rebuilder.addEcVolumeShards(volumeId, collection, generatedShardIds) + + return nil +} + +func generateMissingShards(grpcDialOption grpc.DialOption, collection string, volumeId needle.VolumeId, sourceLocation string) (rebuiltShardIds []uint32, err error) { + + err = operation.WithVolumeServerClient(sourceLocation, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + resp, rebultErr := volumeServerClient.VolumeEcShardsRebuild(context.Background(), &volume_server_pb.VolumeEcShardsRebuildRequest{ + VolumeId: uint32(volumeId), + Collection: collection, + }) + if rebultErr == nil { + rebuiltShardIds = resp.RebuiltShardIds + } + return rebultErr + }) + return +} + +func prepareDataToRecover(commandEnv *CommandEnv, rebuilder *EcNode, collection string, volumeId needle.VolumeId, locations EcShardLocations, writer io.Writer, applyBalancing bool) (copiedShardIds []uint32, localShardIds []uint32, err error) { + + needEcxFile := true + var localShardBits erasure_coding.ShardBits + for _, ecShardInfo := range rebuilder.info.EcShardInfos { + if ecShardInfo.Collection == collection && needle.VolumeId(ecShardInfo.Id) == volumeId { + needEcxFile = false + localShardBits = erasure_coding.ShardBits(ecShardInfo.EcIndexBits) + } + } + + for shardId, ecNodes := range locations { + + if len(ecNodes) == 0 { + fmt.Fprintf(writer, "missing shard %d.%d\n", volumeId, shardId) + continue + } + + if localShardBits.HasShardId(erasure_coding.ShardId(shardId)) { + localShardIds = append(localShardIds, uint32(shardId)) + fmt.Fprintf(writer, "use existing shard %d.%d\n", volumeId, shardId) + continue + } + + var copyErr error + if applyBalancing { + copyErr = operation.WithVolumeServerClient(rebuilder.info.Id, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, copyErr := volumeServerClient.VolumeEcShardsCopy(context.Background(), &volume_server_pb.VolumeEcShardsCopyRequest{ + VolumeId: uint32(volumeId), + Collection: collection, + ShardIds: []uint32{uint32(shardId)}, + CopyEcxFile: needEcxFile, + CopyEcjFile: needEcxFile, + CopyVifFile: needEcxFile, + SourceDataNode: ecNodes[0].info.Id, + }) + return copyErr + }) + if copyErr == nil && needEcxFile { + needEcxFile = false + } + } + if copyErr != nil { + fmt.Fprintf(writer, "%s failed to copy %d.%d from %s: %v\n", rebuilder.info.Id, volumeId, shardId, ecNodes[0].info.Id, copyErr) + } else { + fmt.Fprintf(writer, "%s copied %d.%d from %s\n", rebuilder.info.Id, volumeId, shardId, ecNodes[0].info.Id) + copiedShardIds = append(copiedShardIds, uint32(shardId)) + } + + } + + if len(copiedShardIds)+len(localShardIds) >= erasure_coding.DataShardsCount { + return copiedShardIds, localShardIds, nil + } + + return nil, nil, fmt.Errorf("%d shards are not enough to recover volume %d", len(copiedShardIds)+len(localShardIds), volumeId) + +} + +type EcShardMap map[needle.VolumeId]EcShardLocations +type EcShardLocations [][]*EcNode + +func (ecShardMap EcShardMap) registerEcNode(ecNode *EcNode, collection string) { + for _, shardInfo := range ecNode.info.EcShardInfos { + if shardInfo.Collection == collection { + existing, found := ecShardMap[needle.VolumeId(shardInfo.Id)] + if !found { + existing = make([][]*EcNode, erasure_coding.TotalShardsCount) + ecShardMap[needle.VolumeId(shardInfo.Id)] = existing + } + for _, shardId := range erasure_coding.ShardBits(shardInfo.EcIndexBits).ShardIds() { + existing[shardId] = append(existing[shardId], ecNode) + } + } + } +} + +func (ecShardLocations EcShardLocations) shardCount() (count int) { + for _, locations := range ecShardLocations { + if len(locations) > 0 { + count++ + } + } + return +} diff --git a/weed/shell/command_ec_test.go b/weed/shell/command_ec_test.go new file mode 100644 index 000000000..4fddcbea5 --- /dev/null +++ b/weed/shell/command_ec_test.go @@ -0,0 +1,139 @@ +package shell + +import ( + "fmt" + "testing" + + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func TestCommandEcDistribution(t *testing.T) { + + allEcNodes := []*EcNode{ + newEcNode("dc1", "rack1", "dn1", 100), + newEcNode("dc1", "rack2", "dn2", 100), + } + + allocated := balancedEcDistribution(allEcNodes) + + fmt.Printf("allocated: %+v", allocated) +} + +func TestCommandEcBalanceSmall(t *testing.T) { + + allEcNodes := []*EcNode{ + newEcNode("dc1", "rack1", "dn1", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}), + newEcNode("dc1", "rack2", "dn2", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}), + } + + racks := collectRacks(allEcNodes) + balanceEcVolumes(nil, "c1", allEcNodes, racks, false) +} + +func TestCommandEcBalanceNothingToMove(t *testing.T) { + + allEcNodes := []*EcNode{ + newEcNode("dc1", "rack1", "dn1", 100). + addEcVolumeAndShardsForTest(1, "c1", []uint32{0, 1, 2, 3, 4, 5, 6}). + addEcVolumeAndShardsForTest(2, "c1", []uint32{7, 8, 9, 10, 11, 12, 13}), + newEcNode("dc1", "rack1", "dn2", 100). + addEcVolumeAndShardsForTest(1, "c1", []uint32{7, 8, 9, 10, 11, 12, 13}). + addEcVolumeAndShardsForTest(2, "c1", []uint32{0, 1, 2, 3, 4, 5, 6}), + } + + racks := collectRacks(allEcNodes) + balanceEcVolumes(nil, "c1", allEcNodes, racks, false) +} + +func TestCommandEcBalanceAddNewServers(t *testing.T) { + + allEcNodes := []*EcNode{ + newEcNode("dc1", "rack1", "dn1", 100). + addEcVolumeAndShardsForTest(1, "c1", []uint32{0, 1, 2, 3, 4, 5, 6}). + addEcVolumeAndShardsForTest(2, "c1", []uint32{7, 8, 9, 10, 11, 12, 13}), + newEcNode("dc1", "rack1", "dn2", 100). + addEcVolumeAndShardsForTest(1, "c1", []uint32{7, 8, 9, 10, 11, 12, 13}). + addEcVolumeAndShardsForTest(2, "c1", []uint32{0, 1, 2, 3, 4, 5, 6}), + newEcNode("dc1", "rack1", "dn3", 100), + newEcNode("dc1", "rack1", "dn4", 100), + } + + racks := collectRacks(allEcNodes) + balanceEcVolumes(nil, "c1", allEcNodes, racks, false) +} + +func TestCommandEcBalanceAddNewRacks(t *testing.T) { + + allEcNodes := []*EcNode{ + newEcNode("dc1", "rack1", "dn1", 100). + addEcVolumeAndShardsForTest(1, "c1", []uint32{0, 1, 2, 3, 4, 5, 6}). + addEcVolumeAndShardsForTest(2, "c1", []uint32{7, 8, 9, 10, 11, 12, 13}), + newEcNode("dc1", "rack1", "dn2", 100). + addEcVolumeAndShardsForTest(1, "c1", []uint32{7, 8, 9, 10, 11, 12, 13}). + addEcVolumeAndShardsForTest(2, "c1", []uint32{0, 1, 2, 3, 4, 5, 6}), + newEcNode("dc1", "rack2", "dn3", 100), + newEcNode("dc1", "rack2", "dn4", 100), + } + + racks := collectRacks(allEcNodes) + balanceEcVolumes(nil, "c1", allEcNodes, racks, false) +} + +func TestCommandEcBalanceVolumeEvenButRackUneven(t *testing.T) { + + allEcNodes := []*EcNode{ + newEcNode("dc1", "rack1", "dn_shared", 100). + addEcVolumeAndShardsForTest(1, "c1", []uint32{0}). + addEcVolumeAndShardsForTest(2, "c1", []uint32{0}), + + newEcNode("dc1", "rack1", "dn_a1", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{1}), + newEcNode("dc1", "rack1", "dn_a2", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{2}), + newEcNode("dc1", "rack1", "dn_a3", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{3}), + newEcNode("dc1", "rack1", "dn_a4", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{4}), + newEcNode("dc1", "rack1", "dn_a5", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{5}), + newEcNode("dc1", "rack1", "dn_a6", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{6}), + newEcNode("dc1", "rack1", "dn_a7", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{7}), + newEcNode("dc1", "rack1", "dn_a8", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{8}), + newEcNode("dc1", "rack1", "dn_a9", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{9}), + newEcNode("dc1", "rack1", "dn_a10", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{10}), + newEcNode("dc1", "rack1", "dn_a11", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{11}), + newEcNode("dc1", "rack1", "dn_a12", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{12}), + newEcNode("dc1", "rack1", "dn_a13", 100).addEcVolumeAndShardsForTest(1, "c1", []uint32{13}), + + newEcNode("dc1", "rack1", "dn_b1", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{1}), + newEcNode("dc1", "rack1", "dn_b2", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{2}), + newEcNode("dc1", "rack1", "dn_b3", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{3}), + newEcNode("dc1", "rack1", "dn_b4", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{4}), + newEcNode("dc1", "rack1", "dn_b5", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{5}), + newEcNode("dc1", "rack1", "dn_b6", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{6}), + newEcNode("dc1", "rack1", "dn_b7", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{7}), + newEcNode("dc1", "rack1", "dn_b8", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{8}), + newEcNode("dc1", "rack1", "dn_b9", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{9}), + newEcNode("dc1", "rack1", "dn_b10", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{10}), + newEcNode("dc1", "rack1", "dn_b11", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{11}), + newEcNode("dc1", "rack1", "dn_b12", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{12}), + newEcNode("dc1", "rack1", "dn_b13", 100).addEcVolumeAndShardsForTest(2, "c1", []uint32{13}), + + newEcNode("dc1", "rack1", "dn3", 100), + } + + racks := collectRacks(allEcNodes) + balanceEcVolumes(nil, "c1", allEcNodes, racks, false) + balanceEcRacks(nil, racks, false) +} + +func newEcNode(dc string, rack string, dataNodeId string, freeEcSlot int) *EcNode { + return &EcNode{ + info: &master_pb.DataNodeInfo{ + Id: dataNodeId, + }, + dc: dc, + rack: RackId(rack), + freeEcSlot: freeEcSlot, + } +} + +func (ecNode *EcNode) addEcVolumeAndShardsForTest(vid uint32, collection string, shardIds []uint32) *EcNode { + return ecNode.addEcVolumeShards(needle.VolumeId(vid), collection, shardIds) +} diff --git a/weed/shell/command_fs_cat.go b/weed/shell/command_fs_cat.go new file mode 100644 index 000000000..3db487979 --- /dev/null +++ b/weed/shell/command_fs_cat.go @@ -0,0 +1,61 @@ +package shell + +import ( + "fmt" + "io" + "math" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func init() { + Commands = append(Commands, &commandFsCat{}) +} + +type commandFsCat struct { +} + +func (c *commandFsCat) Name() string { + return "fs.cat" +} + +func (c *commandFsCat) Help() string { + return `stream the file content on to the screen + + fs.cat /dir/file_name + fs.cat http://<filer_server>:<port>/dir/file_name +` +} + +func (c *commandFsCat) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + input := findInputDirectory(args) + + filerServer, filerPort, path, err := commandEnv.parseUrl(input) + if err != nil { + return err + } + + if commandEnv.isDirectory(filerServer, filerPort, path) { + return fmt.Errorf("%s is a directory", path) + } + + dir, name := filer2.FullPath(path).DirAndName() + + return commandEnv.withFilerClient(filerServer, filerPort, func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.LookupDirectoryEntryRequest{ + Name: name, + Directory: dir, + } + respLookupEntry, err := filer_pb.LookupEntry(client, request) + if err != nil { + return err + } + + return filer2.StreamContent(commandEnv.MasterClient, writer, respLookupEntry.Entry.Chunks, 0, math.MaxInt32) + + }) + +} diff --git a/weed/shell/command_fs_cd.go b/weed/shell/command_fs_cd.go new file mode 100644 index 000000000..df42cd516 --- /dev/null +++ b/weed/shell/command_fs_cd.go @@ -0,0 +1,56 @@ +package shell + +import ( + "io" +) + +func init() { + Commands = append(Commands, &commandFsCd{}) +} + +type commandFsCd struct { +} + +func (c *commandFsCd) Name() string { + return "fs.cd" +} + +func (c *commandFsCd) Help() string { + return `change directory to http://<filer_server>:<port>/dir/ + + The full path can be too long to type. For example, + fs.ls http://<filer_server>:<port>/some/path/to/file_name + + can be simplified as + + fs.cd http://<filer_server>:<port>/some/path + fs.ls to/file_name +` +} + +func (c *commandFsCd) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + input := findInputDirectory(args) + + filerServer, filerPort, path, err := commandEnv.parseUrl(input) + if err != nil { + return err + } + + if path == "/" { + commandEnv.option.FilerHost = filerServer + commandEnv.option.FilerPort = filerPort + commandEnv.option.Directory = "/" + return nil + } + + err = commandEnv.checkDirectory(filerServer, filerPort, path) + + if err == nil { + commandEnv.option.FilerHost = filerServer + commandEnv.option.FilerPort = filerPort + commandEnv.option.Directory = path + } + + return err +} diff --git a/weed/shell/command_fs_du.go b/weed/shell/command_fs_du.go new file mode 100644 index 000000000..ca2f22b57 --- /dev/null +++ b/weed/shell/command_fs_du.go @@ -0,0 +1,105 @@ +package shell + +import ( + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func init() { + Commands = append(Commands, &commandFsDu{}) +} + +type commandFsDu struct { +} + +func (c *commandFsDu) Name() string { + return "fs.du" +} + +func (c *commandFsDu) Help() string { + return `show disk usage + + fs.du http://<filer_server>:<port>/dir + fs.du http://<filer_server>:<port>/dir/file_name + fs.du http://<filer_server>:<port>/dir/file_prefix +` +} + +func (c *commandFsDu) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + filerServer, filerPort, path, err := commandEnv.parseUrl(findInputDirectory(args)) + if err != nil { + return err + } + + if commandEnv.isDirectory(filerServer, filerPort, path) { + path = path + "/" + } + + var blockCount, byteCount uint64 + dir, name := filer2.FullPath(path).DirAndName() + blockCount, byteCount, err = duTraverseDirectory(writer, commandEnv.getFilerClient(filerServer, filerPort), dir, name) + + if name == "" && err == nil { + fmt.Fprintf(writer, "block:%4d\tbyte:%10d\t%s\n", blockCount, byteCount, dir) + } + + return + +} + +func duTraverseDirectory(writer io.Writer, filerClient filer2.FilerClient, dir, name string) (blockCount, byteCount uint64, err error) { + + err = filer2.ReadDirAllEntries(filerClient, filer2.FullPath(dir), name, func(entry *filer_pb.Entry, isLast bool) { + if entry.IsDirectory { + subDir := fmt.Sprintf("%s/%s", dir, entry.Name) + if dir == "/" { + subDir = "/" + entry.Name + } + numBlock, numByte, err := duTraverseDirectory(writer, filerClient, subDir, "") + if err == nil { + blockCount += numBlock + byteCount += numByte + } + } else { + blockCount += uint64(len(entry.Chunks)) + byteCount += filer2.TotalSize(entry.Chunks) + } + + if name != "" && !entry.IsDirectory { + fmt.Fprintf(writer, "block:%4d\tbyte:%10d\t%s/%s\n", blockCount, byteCount, dir, name) + } + }) + return +} + +func (env *CommandEnv) withFilerClient(filerServer string, filerPort int64, fn func(filer_pb.SeaweedFilerClient) error) error { + + filerGrpcAddress := fmt.Sprintf("%s:%d", filerServer, filerPort+10000) + return pb.WithGrpcFilerClient(filerGrpcAddress, env.option.GrpcDialOption, fn) + +} + +type commandFilerClient struct { + env *CommandEnv + filerServer string + filerPort int64 +} + +func (env *CommandEnv) getFilerClient(filerServer string, filerPort int64) *commandFilerClient { + return &commandFilerClient{ + env: env, + filerServer: filerServer, + filerPort: filerPort, + } +} +func (c *commandFilerClient) WithFilerClient(fn func(filer_pb.SeaweedFilerClient) error) error { + return c.env.withFilerClient(c.filerServer, c.filerPort, fn) +} +func (c *commandFilerClient) AdjustedUrl(hostAndPort string) string { + return hostAndPort +} diff --git a/weed/shell/command_fs_ls.go b/weed/shell/command_fs_ls.go new file mode 100644 index 000000000..69ebe1b30 --- /dev/null +++ b/weed/shell/command_fs_ls.go @@ -0,0 +1,114 @@ +package shell + +import ( + "fmt" + "io" + "os" + "os/user" + "strconv" + "strings" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func init() { + Commands = append(Commands, &commandFsLs{}) +} + +type commandFsLs struct { +} + +func (c *commandFsLs) Name() string { + return "fs.ls" +} + +func (c *commandFsLs) Help() string { + return `list all files under a directory + + fs.ls [-l] [-a] /dir/ + fs.ls [-l] [-a] /dir/file_name + fs.ls [-l] [-a] /dir/file_prefix + fs.ls [-l] [-a] http://<filer_server>:<port>/dir/ + fs.ls [-l] [-a] http://<filer_server>:<port>/dir/file_name + fs.ls [-l] [-a] http://<filer_server>:<port>/dir/file_prefix +` +} + +func (c *commandFsLs) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + var isLongFormat, showHidden bool + for _, arg := range args { + if !strings.HasPrefix(arg, "-") { + break + } + for _, t := range arg { + switch t { + case 'a': + showHidden = true + case 'l': + isLongFormat = true + } + } + } + + input := findInputDirectory(args) + + filerServer, filerPort, path, err := commandEnv.parseUrl(input) + if err != nil { + return err + } + + if commandEnv.isDirectory(filerServer, filerPort, path) { + path = path + "/" + } + + dir, name := filer2.FullPath(path).DirAndName() + entryCount := 0 + + err = filer2.ReadDirAllEntries(commandEnv.getFilerClient(filerServer, filerPort), filer2.FullPath(dir), name, func(entry *filer_pb.Entry, isLast bool) { + + if !showHidden && strings.HasPrefix(entry.Name, ".") { + return + } + + entryCount++ + + if isLongFormat { + fileMode := os.FileMode(entry.Attributes.FileMode) + userName, groupNames := entry.Attributes.UserName, entry.Attributes.GroupName + if userName == "" { + if user, userErr := user.LookupId(strconv.Itoa(int(entry.Attributes.Uid))); userErr == nil { + userName = user.Username + } + } + groupName := "" + if len(groupNames) > 0 { + groupName = groupNames[0] + } + if groupName == "" { + if group, groupErr := user.LookupGroupId(strconv.Itoa(int(entry.Attributes.Gid))); groupErr == nil { + groupName = group.Name + } + } + + if dir == "/" { + // just for printing + dir = "" + } + fmt.Fprintf(writer, "%s %3d %s %s %6d %s/%s\n", + fileMode, len(entry.Chunks), + userName, groupName, + filer2.TotalSize(entry.Chunks), dir, entry.Name) + } else { + fmt.Fprintf(writer, "%s\n", entry.Name) + } + + }) + + if isLongFormat && err == nil { + fmt.Fprintf(writer, "total %d\n", entryCount) + } + + return +} diff --git a/weed/shell/command_fs_meta_cat.go b/weed/shell/command_fs_meta_cat.go new file mode 100644 index 000000000..cd1ffb6fd --- /dev/null +++ b/weed/shell/command_fs_meta_cat.go @@ -0,0 +1,72 @@ +package shell + +import ( + "fmt" + "io" + + "github.com/golang/protobuf/jsonpb" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func init() { + Commands = append(Commands, &commandFsMetaCat{}) +} + +type commandFsMetaCat struct { +} + +func (c *commandFsMetaCat) Name() string { + return "fs.meta.cat" +} + +func (c *commandFsMetaCat) Help() string { + return `print out the meta data content for a file or directory + + fs.meta.cat /dir/ + fs.meta.cat /dir/file_name + fs.meta.cat http://<filer_server>:<port>/dir/ + fs.meta.cat http://<filer_server>:<port>/dir/file_name +` +} + +func (c *commandFsMetaCat) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + input := findInputDirectory(args) + + filerServer, filerPort, path, err := commandEnv.parseUrl(input) + if err != nil { + return err + } + + dir, name := filer2.FullPath(path).DirAndName() + + return commandEnv.withFilerClient(filerServer, filerPort, func(client filer_pb.SeaweedFilerClient) error { + + request := &filer_pb.LookupDirectoryEntryRequest{ + Name: name, + Directory: dir, + } + respLookupEntry, err := filer_pb.LookupEntry(client, request) + if err != nil { + return err + } + + m := jsonpb.Marshaler{ + EmitDefaults: true, + Indent: " ", + } + + text, marshalErr := m.MarshalToString(respLookupEntry.Entry) + if marshalErr != nil { + return fmt.Errorf("marshal meta: %v", marshalErr) + } + + fmt.Fprintf(writer, "%s\n", text) + + return nil + + }) + +} diff --git a/weed/shell/command_fs_meta_load.go b/weed/shell/command_fs_meta_load.go new file mode 100644 index 000000000..ed92d8011 --- /dev/null +++ b/weed/shell/command_fs_meta_load.go @@ -0,0 +1,106 @@ +package shell + +import ( + "fmt" + "io" + "os" + + "github.com/golang/protobuf/proto" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func init() { + Commands = append(Commands, &commandFsMetaLoad{}) +} + +type commandFsMetaLoad struct { +} + +func (c *commandFsMetaLoad) Name() string { + return "fs.meta.load" +} + +func (c *commandFsMetaLoad) Help() string { + return `load saved filer meta data to restore the directory and file structure + + fs.meta.load <filer_host>-<port>-<time>.meta + +` +} + +func (c *commandFsMetaLoad) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + if len(args) == 0 { + fmt.Fprintf(writer, "missing a metadata file\n") + return nil + } + + filerServer, filerPort, path, err := commandEnv.parseUrl(findInputDirectory(nil)) + if err != nil { + return err + } + + fileName := args[len(args)-1] + + dst, err := os.OpenFile(fileName, os.O_RDONLY, 0644) + if err != nil { + return nil + } + defer dst.Close() + + var dirCount, fileCount uint64 + + err = commandEnv.withFilerClient(filerServer, filerPort, func(client filer_pb.SeaweedFilerClient) error { + + sizeBuf := make([]byte, 4) + + for { + if n, err := dst.Read(sizeBuf); n != 4 { + if err == io.EOF { + return nil + } + return err + } + + size := util.BytesToUint32(sizeBuf) + + data := make([]byte, int(size)) + + if n, err := dst.Read(data); n != len(data) { + return err + } + + fullEntry := &filer_pb.FullEntry{} + if err = proto.Unmarshal(data, fullEntry); err != nil { + return err + } + + if err := filer_pb.CreateEntry(client, &filer_pb.CreateEntryRequest{ + Directory: fullEntry.Dir, + Entry: fullEntry.Entry, + }); err != nil { + return err + } + + fmt.Fprintf(writer, "load %s\n", filer2.FullPath(fullEntry.Dir).Child(fullEntry.Entry.Name)) + + if fullEntry.Entry.IsDirectory { + dirCount++ + } else { + fileCount++ + } + + } + + }) + + if err == nil { + fmt.Fprintf(writer, "\ntotal %d directories, %d files", dirCount, fileCount) + fmt.Fprintf(writer, "\n%s is loaded to http://%s:%d%s\n", fileName, filerServer, filerPort, path) + } + + return err +} diff --git a/weed/shell/command_fs_meta_notify.go b/weed/shell/command_fs_meta_notify.go new file mode 100644 index 000000000..099e04506 --- /dev/null +++ b/weed/shell/command_fs_meta_notify.go @@ -0,0 +1,74 @@ +package shell + +import ( + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/notification" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func init() { + Commands = append(Commands, &commandFsMetaNotify{}) +} + +type commandFsMetaNotify struct { +} + +func (c *commandFsMetaNotify) Name() string { + return "fs.meta.notify" +} + +func (c *commandFsMetaNotify) Help() string { + return `recursively send directory and file meta data to notifiction message queue + + fs.meta.notify # send meta data from current directory to notification message queue + + The message queue will use it to trigger replication from this filer. + +` +} + +func (c *commandFsMetaNotify) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + filerServer, filerPort, path, err := commandEnv.parseUrl(findInputDirectory(args)) + if err != nil { + return err + } + + util.LoadConfiguration("notification", true) + v := util.GetViper() + notification.LoadConfiguration(v, "notification.") + + var dirCount, fileCount uint64 + + err = doTraverseBFS(writer, commandEnv.getFilerClient(filerServer, filerPort), filer2.FullPath(path), func(parentPath filer2.FullPath, entry *filer_pb.Entry) { + + if entry.IsDirectory { + dirCount++ + } else { + fileCount++ + } + + notifyErr := notification.Queue.SendMessage( + string(parentPath.Child(entry.Name)), + &filer_pb.EventNotification{ + NewEntry: entry, + }, + ) + + if notifyErr != nil { + fmt.Fprintf(writer, "fail to notify new entry event for %s: %v\n", parentPath.Child(entry.Name), notifyErr) + } + + }) + + if err == nil { + fmt.Fprintf(writer, "\ntotal notified %d directories, %d files\n", dirCount, fileCount) + } + + return err + +} diff --git a/weed/shell/command_fs_meta_save.go b/weed/shell/command_fs_meta_save.go new file mode 100644 index 000000000..b51fdd0f6 --- /dev/null +++ b/weed/shell/command_fs_meta_save.go @@ -0,0 +1,179 @@ +package shell + +import ( + "flag" + "fmt" + "io" + "os" + "sync" + "sync/atomic" + "time" + + "github.com/golang/protobuf/proto" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/util" +) + +func init() { + Commands = append(Commands, &commandFsMetaSave{}) +} + +type commandFsMetaSave struct { +} + +func (c *commandFsMetaSave) Name() string { + return "fs.meta.save" +} + +func (c *commandFsMetaSave) Help() string { + return `save all directory and file meta data to a local file for metadata backup. + + fs.meta.save / # save from the root + fs.meta.save -v -o t.meta / # save from the root, output to t.meta file. + fs.meta.save /path/to/save # save from the directory /path/to/save + fs.meta.save . # save from current directory + fs.meta.save # save from current directory + + The meta data will be saved into a local <filer_host>-<port>-<time>.meta file. + These meta data can be later loaded by fs.meta.load command, + + This assumes there are no deletions, so this is different from taking a snapshot. + +` +} + +func (c *commandFsMetaSave) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + fsMetaSaveCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + verbose := fsMetaSaveCommand.Bool("v", false, "print out each processed files") + outputFileName := fsMetaSaveCommand.String("o", "", "output the meta data to this file") + if err = fsMetaSaveCommand.Parse(args); err != nil { + return nil + } + + filerServer, filerPort, path, parseErr := commandEnv.parseUrl(findInputDirectory(fsMetaSaveCommand.Args())) + if parseErr != nil { + return parseErr + } + + t := time.Now() + fileName := *outputFileName + if fileName == "" { + fileName = fmt.Sprintf("%s-%d-%4d%02d%02d-%02d%02d%02d.meta", + filerServer, filerPort, t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second()) + } + + dst, openErr := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if openErr != nil { + return fmt.Errorf("failed to create file %s: %v", fileName, openErr) + } + defer dst.Close() + + var wg sync.WaitGroup + wg.Add(1) + outputChan := make(chan []byte, 1024) + go func() { + sizeBuf := make([]byte, 4) + for b := range outputChan { + util.Uint32toBytes(sizeBuf, uint32(len(b))) + dst.Write(sizeBuf) + dst.Write(b) + } + wg.Done() + }() + + var dirCount, fileCount uint64 + + err = doTraverseBFS(writer, commandEnv.getFilerClient(filerServer, filerPort), filer2.FullPath(path), func(parentPath filer2.FullPath, entry *filer_pb.Entry) { + + protoMessage := &filer_pb.FullEntry{ + Dir: string(parentPath), + Entry: entry, + } + + bytes, err := proto.Marshal(protoMessage) + if err != nil { + fmt.Fprintf(writer, "marshall error: %v\n", err) + return + } + + outputChan <- bytes + + if entry.IsDirectory { + atomic.AddUint64(&dirCount, 1) + } else { + atomic.AddUint64(&fileCount, 1) + } + + if *verbose { + println(parentPath.Child(entry.Name)) + } + + }) + + close(outputChan) + + wg.Wait() + + if err == nil { + fmt.Fprintf(writer, "total %d directories, %d files\n", dirCount, fileCount) + fmt.Fprintf(writer, "meta data for http://%s:%d%s is saved to %s\n", filerServer, filerPort, path, fileName) + } + + return err + +} +func doTraverseBFS(writer io.Writer, filerClient filer2.FilerClient, parentPath filer2.FullPath, fn func(parentPath filer2.FullPath, entry *filer_pb.Entry)) (err error) { + + K := 5 + + var jobQueueWg sync.WaitGroup + queue := util.NewQueue() + jobQueueWg.Add(1) + queue.Enqueue(parentPath) + var isTerminating bool + + for i := 0; i < K; i++ { + go func() { + for { + if isTerminating { + break + } + t := queue.Dequeue() + if t == nil { + time.Sleep(329 * time.Millisecond) + continue + } + dir := t.(filer2.FullPath) + processErr := processOneDirectory(writer, filerClient, dir, queue, &jobQueueWg, fn) + if processErr != nil { + err = processErr + } + jobQueueWg.Done() + } + }() + } + jobQueueWg.Wait() + isTerminating = true + return +} + +func processOneDirectory(writer io.Writer, filerClient filer2.FilerClient, parentPath filer2.FullPath, queue *util.Queue, jobQueueWg *sync.WaitGroup, fn func(parentPath filer2.FullPath, entry *filer_pb.Entry)) (err error) { + + return filer2.ReadDirAllEntries(filerClient, parentPath, "", func(entry *filer_pb.Entry, isLast bool) { + + fn(parentPath, entry) + + if entry.IsDirectory { + subDir := fmt.Sprintf("%s/%s", parentPath, entry.Name) + if parentPath == "/" { + subDir = "/" + entry.Name + } + jobQueueWg.Add(1) + queue.Enqueue(filer2.FullPath(subDir)) + } + }) + +} diff --git a/weed/shell/command_fs_mv.go b/weed/shell/command_fs_mv.go new file mode 100644 index 000000000..85275058e --- /dev/null +++ b/weed/shell/command_fs_mv.go @@ -0,0 +1,91 @@ +package shell + +import ( + "context" + "fmt" + "io" + "path/filepath" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func init() { + Commands = append(Commands, &commandFsMv{}) +} + +type commandFsMv struct { +} + +func (c *commandFsMv) Name() string { + return "fs.mv" +} + +func (c *commandFsMv) Help() string { + return `move or rename a file or a folder + + fs.mv <source entry> <destination entry> + + fs.mv /dir/file_name /dir2/filename2 + fs.mv /dir/file_name /dir2 + + fs.mv /dir/dir2 /dir3/dir4/ + fs.mv /dir/dir2 /dir3/new_dir + +` +} + +func (c *commandFsMv) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + filerServer, filerPort, sourcePath, err := commandEnv.parseUrl(args[0]) + if err != nil { + return err + } + + _, _, destinationPath, err := commandEnv.parseUrl(args[1]) + if err != nil { + return err + } + + sourceDir, sourceName := filer2.FullPath(sourcePath).DirAndName() + + destinationDir, destinationName := filer2.FullPath(destinationPath).DirAndName() + + return commandEnv.withFilerClient(filerServer, filerPort, func(client filer_pb.SeaweedFilerClient) error { + + // collect destination entry info + destinationRequest := &filer_pb.LookupDirectoryEntryRequest{ + Name: destinationDir, + Directory: destinationName, + } + respDestinationLookupEntry, err := filer_pb.LookupEntry(client, destinationRequest) + + var targetDir, targetName string + + // moving a file or folder + if err == nil && respDestinationLookupEntry.Entry.IsDirectory { + // to a directory + targetDir = filepath.ToSlash(filepath.Join(destinationDir, destinationName)) + targetName = sourceName + } else { + // to a file or folder + targetDir = destinationDir + targetName = destinationName + } + + request := &filer_pb.AtomicRenameEntryRequest{ + OldDirectory: sourceDir, + OldName: sourceName, + NewDirectory: targetDir, + NewName: targetName, + } + + _, err = client.AtomicRenameEntry(context.Background(), request) + + fmt.Fprintf(writer, "move: %s => %s\n", sourcePath, filer2.NewFullPath(targetDir, targetName)) + + return err + + }) + +} diff --git a/weed/shell/command_fs_pwd.go b/weed/shell/command_fs_pwd.go new file mode 100644 index 000000000..084a5e90a --- /dev/null +++ b/weed/shell/command_fs_pwd.go @@ -0,0 +1,32 @@ +package shell + +import ( + "fmt" + "io" +) + +func init() { + Commands = append(Commands, &commandFsPwd{}) +} + +type commandFsPwd struct { +} + +func (c *commandFsPwd) Name() string { + return "fs.pwd" +} + +func (c *commandFsPwd) Help() string { + return `print out current directory` +} + +func (c *commandFsPwd) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + fmt.Fprintf(writer, "http://%s:%d%s\n", + commandEnv.option.FilerHost, + commandEnv.option.FilerPort, + commandEnv.option.Directory, + ) + + return nil +} diff --git a/weed/shell/command_fs_tree.go b/weed/shell/command_fs_tree.go new file mode 100644 index 000000000..04530571c --- /dev/null +++ b/weed/shell/command_fs_tree.go @@ -0,0 +1,112 @@ +package shell + +import ( + "fmt" + "io" + "strings" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" +) + +func init() { + Commands = append(Commands, &commandFsTree{}) +} + +type commandFsTree struct { +} + +func (c *commandFsTree) Name() string { + return "fs.tree" +} + +func (c *commandFsTree) Help() string { + return `recursively list all files under a directory + + fs.tree http://<filer_server>:<port>/dir/ +` +} + +func (c *commandFsTree) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + filerServer, filerPort, path, err := commandEnv.parseUrl(findInputDirectory(args)) + if err != nil { + return err + } + + dir, name := filer2.FullPath(path).DirAndName() + + dirCount, fCount, terr := treeTraverseDirectory(writer, commandEnv.getFilerClient(filerServer, filerPort), filer2.FullPath(dir), name, newPrefix(), -1) + + if terr == nil { + fmt.Fprintf(writer, "%d directories, %d files\n", dirCount, fCount) + } + + return terr + +} + +func treeTraverseDirectory(writer io.Writer, filerClient filer2.FilerClient, dir filer2.FullPath, name string, prefix *Prefix, level int) (directoryCount, fileCount int64, err error) { + + prefix.addMarker(level) + + err = filer2.ReadDirAllEntries(filerClient, dir, name, func(entry *filer_pb.Entry, isLast bool) { + if level < 0 && name != "" { + if entry.Name != name { + return + } + } + + fmt.Fprintf(writer, "%s%s\n", prefix.getPrefix(level, isLast), entry.Name) + + if entry.IsDirectory { + directoryCount++ + subDir := dir.Child(entry.Name) + dirCount, fCount, terr := treeTraverseDirectory(writer, filerClient, subDir, "", prefix, level+1) + directoryCount += dirCount + fileCount += fCount + err = terr + } else { + fileCount++ + } + + }) + return +} + +type Prefix struct { + markers map[int]bool +} + +func newPrefix() *Prefix { + return &Prefix{ + markers: make(map[int]bool), + } +} +func (p *Prefix) addMarker(marker int) { + p.markers[marker] = true +} +func (p *Prefix) removeMarker(marker int) { + delete(p.markers, marker) +} +func (p *Prefix) getPrefix(level int, isLastChild bool) string { + var sb strings.Builder + if level < 0 { + return "" + } + for i := 0; i < level; i++ { + if _, ok := p.markers[i]; ok { + sb.WriteString("│") + } else { + sb.WriteString(" ") + } + sb.WriteString(" ") + } + if isLastChild { + sb.WriteString("└──") + p.removeMarker(level) + } else { + sb.WriteString("├──") + } + return sb.String() +} diff --git a/weed/shell/command_volume_balance.go b/weed/shell/command_volume_balance.go new file mode 100644 index 000000000..349f52f1c --- /dev/null +++ b/weed/shell/command_volume_balance.go @@ -0,0 +1,253 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "io" + "os" + "sort" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func init() { + Commands = append(Commands, &commandVolumeBalance{}) +} + +type commandVolumeBalance struct { +} + +func (c *commandVolumeBalance) Name() string { + return "volume.balance" +} + +func (c *commandVolumeBalance) Help() string { + return `balance all volumes among volume servers + + volume.balance [-collection ALL|EACH_COLLECTION|<collection_name>] [-force] [-dataCenter=<data_center_name>] + + Algorithm: + + For each type of volume server (different max volume count limit){ + for each collection { + balanceWritableVolumes() + balanceReadOnlyVolumes() + } + } + + func balanceWritableVolumes(){ + idealWritableVolumes = totalWritableVolumes / numVolumeServers + for hasMovedOneVolume { + sort all volume servers ordered by the number of local writable volumes + pick the volume server A with the lowest number of writable volumes x + pick the volume server B with the highest number of writable volumes y + if y > idealWritableVolumes and x +1 <= idealWritableVolumes { + if B has a writable volume id v that A does not have { + move writable volume v from A to B + } + } + } + } + func balanceReadOnlyVolumes(){ + //similar to balanceWritableVolumes + } + +` +} + +func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + balanceCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + collection := balanceCommand.String("collection", "EACH_COLLECTION", "collection name, or use \"ALL_COLLECTIONS\" across collections, \"EACH_COLLECTION\" for each collection") + dc := balanceCommand.String("dataCenter", "", "only apply the balancing for this dataCenter") + applyBalancing := balanceCommand.Bool("force", false, "apply the balancing plan.") + if err = balanceCommand.Parse(args); err != nil { + return nil + } + + var resp *master_pb.VolumeListResponse + err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{}) + return err + }) + if err != nil { + return err + } + + typeToNodes := collectVolumeServersByType(resp.TopologyInfo, *dc) + + for maxVolumeCount, volumeServers := range typeToNodes { + if len(volumeServers) < 2 { + fmt.Printf("only 1 node is configured max %d volumes, skipping balancing\n", maxVolumeCount) + continue + } + if *collection == "EACH_COLLECTION" { + collections, err := ListCollectionNames(commandEnv, true, false) + if err != nil { + return err + } + for _, c := range collections { + if err = balanceVolumeServers(commandEnv, volumeServers, resp.VolumeSizeLimitMb*1024*1024, c, *applyBalancing); err != nil { + return err + } + } + } else if *collection == "ALL_COLLECTIONS" { + if err = balanceVolumeServers(commandEnv, volumeServers, resp.VolumeSizeLimitMb*1024*1024, "ALL_COLLECTIONS", *applyBalancing); err != nil { + return err + } + } else { + if err = balanceVolumeServers(commandEnv, volumeServers, resp.VolumeSizeLimitMb*1024*1024, *collection, *applyBalancing); err != nil { + return err + } + } + + } + return nil +} + +func balanceVolumeServers(commandEnv *CommandEnv, nodes []*Node, volumeSizeLimit uint64, collection string, applyBalancing bool) error { + + // balance writable volumes + for _, n := range nodes { + n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool { + if collection != "ALL_COLLECTIONS" { + if v.Collection != collection { + return false + } + } + return !v.ReadOnly && v.Size < volumeSizeLimit + }) + } + if err := balanceSelectedVolume(commandEnv, nodes, sortWritableVolumes, applyBalancing); err != nil { + return err + } + + // balance readable volumes + for _, n := range nodes { + n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool { + if collection != "ALL_COLLECTIONS" { + if v.Collection != collection { + return false + } + } + return v.ReadOnly || v.Size >= volumeSizeLimit + }) + } + if err := balanceSelectedVolume(commandEnv, nodes, sortReadOnlyVolumes, applyBalancing); err != nil { + return err + } + + return nil +} + +func collectVolumeServersByType(t *master_pb.TopologyInfo, selectedDataCenter string) (typeToNodes map[uint64][]*Node) { + typeToNodes = make(map[uint64][]*Node) + for _, dc := range t.DataCenterInfos { + if selectedDataCenter != "" && dc.Id != selectedDataCenter { + continue + } + for _, r := range dc.RackInfos { + for _, dn := range r.DataNodeInfos { + typeToNodes[dn.MaxVolumeCount] = append(typeToNodes[dn.MaxVolumeCount], &Node{ + info: dn, + dc: dc.Id, + rack: r.Id, + }) + } + } + } + return +} + +type Node struct { + info *master_pb.DataNodeInfo + selectedVolumes map[uint32]*master_pb.VolumeInformationMessage + dc string + rack string +} + +func sortWritableVolumes(volumes []*master_pb.VolumeInformationMessage) { + sort.Slice(volumes, func(i, j int) bool { + return volumes[i].Size < volumes[j].Size + }) +} + +func sortReadOnlyVolumes(volumes []*master_pb.VolumeInformationMessage) { + sort.Slice(volumes, func(i, j int) bool { + return volumes[i].Id < volumes[j].Id + }) +} + +func balanceSelectedVolume(commandEnv *CommandEnv, nodes []*Node, sortCandidatesFn func(volumes []*master_pb.VolumeInformationMessage), applyBalancing bool) error { + selectedVolumeCount := 0 + for _, dn := range nodes { + selectedVolumeCount += len(dn.selectedVolumes) + } + + idealSelectedVolumes := ceilDivide(selectedVolumeCount, len(nodes)) + + hasMove := true + + for hasMove { + hasMove = false + sort.Slice(nodes, func(i, j int) bool { + // TODO sort by free volume slots??? + return len(nodes[i].selectedVolumes) < len(nodes[j].selectedVolumes) + }) + emptyNode, fullNode := nodes[0], nodes[len(nodes)-1] + if len(fullNode.selectedVolumes) > idealSelectedVolumes && len(emptyNode.selectedVolumes)+1 <= idealSelectedVolumes { + + // sort the volumes to move + var candidateVolumes []*master_pb.VolumeInformationMessage + for _, v := range fullNode.selectedVolumes { + candidateVolumes = append(candidateVolumes, v) + } + sortCandidatesFn(candidateVolumes) + + for _, v := range candidateVolumes { + if v.ReplicaPlacement > 0 { + if fullNode.dc != emptyNode.dc && fullNode.rack != emptyNode.rack { + // TODO this logic is too simple, but should work most of the time + // Need a correct algorithm to handle all different cases + continue + } + } + if _, found := emptyNode.selectedVolumes[v.Id]; !found { + if err := moveVolume(commandEnv, v, fullNode, emptyNode, applyBalancing); err == nil { + delete(fullNode.selectedVolumes, v.Id) + emptyNode.selectedVolumes[v.Id] = v + hasMove = true + break + } else { + return err + } + } + } + } + } + return nil +} + +func moveVolume(commandEnv *CommandEnv, v *master_pb.VolumeInformationMessage, fullNode *Node, emptyNode *Node, applyBalancing bool) error { + collectionPrefix := v.Collection + "_" + if v.Collection == "" { + collectionPrefix = "" + } + fmt.Fprintf(os.Stdout, "moving volume %s%d %s => %s\n", collectionPrefix, v.Id, fullNode.info.Id, emptyNode.info.Id) + if applyBalancing { + return LiveMoveVolume(commandEnv.option.GrpcDialOption, needle.VolumeId(v.Id), fullNode.info.Id, emptyNode.info.Id, 5*time.Second) + } + return nil +} + +func (node *Node) selectVolumes(fn func(v *master_pb.VolumeInformationMessage) bool) { + node.selectedVolumes = make(map[uint32]*master_pb.VolumeInformationMessage) + for _, v := range node.info.VolumeInfos { + if fn(v) { + node.selectedVolumes[v.Id] = v + } + } +} diff --git a/weed/shell/command_volume_configure_replication.go b/weed/shell/command_volume_configure_replication.go new file mode 100644 index 000000000..133ec62c6 --- /dev/null +++ b/weed/shell/command_volume_configure_replication.go @@ -0,0 +1,104 @@ +package shell + +import ( + "context" + "errors" + "flag" + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" +) + +func init() { + Commands = append(Commands, &commandVolumeConfigureReplication{}) +} + +type commandVolumeConfigureReplication struct { +} + +func (c *commandVolumeConfigureReplication) Name() string { + return "volume.configure.replication" +} + +func (c *commandVolumeConfigureReplication) Help() string { + return `change volume replication value + + This command changes a volume replication value. It should be followed by volume.fix.replication. + +` +} + +func (c *commandVolumeConfigureReplication) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + configureReplicationCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + volumeIdInt := configureReplicationCommand.Int("volumeId", 0, "the volume id") + replicationString := configureReplicationCommand.String("replication", "", "the intended replication value") + if err = configureReplicationCommand.Parse(args); err != nil { + return nil + } + + if *replicationString == "" { + return fmt.Errorf("empty replication value") + } + + replicaPlacement, err := super_block.NewReplicaPlacementFromString(*replicationString) + if err != nil { + return fmt.Errorf("replication format: %v", err) + } + replicaPlacementInt32 := uint32(replicaPlacement.Byte()) + + var resp *master_pb.VolumeListResponse + err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{}) + return err + }) + if err != nil { + return err + } + + vid := needle.VolumeId(*volumeIdInt) + + // find all data nodes with volumes that needs replication change + var allLocations []location + eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { + loc := newLocation(dc, string(rack), dn) + for _, v := range dn.VolumeInfos { + if v.Id == uint32(vid) && v.ReplicaPlacement != replicaPlacementInt32 { + allLocations = append(allLocations, loc) + continue + } + } + }) + + if len(allLocations) == 0 { + return fmt.Errorf("no volume needs change") + } + + for _, dst := range allLocations { + err := operation.WithVolumeServerClient(dst.dataNode.Id, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + resp, configureErr := volumeServerClient.VolumeConfigure(context.Background(), &volume_server_pb.VolumeConfigureRequest{ + VolumeId: uint32(vid), + Replication: replicaPlacement.String(), + }) + if configureErr != nil { + return configureErr + } + if resp.Error != "" { + return errors.New(resp.Error) + } + return nil + }) + + if err != nil { + return err + } + + } + + return nil +} diff --git a/weed/shell/command_volume_copy.go b/weed/shell/command_volume_copy.go new file mode 100644 index 000000000..aecc071ad --- /dev/null +++ b/weed/shell/command_volume_copy.go @@ -0,0 +1,51 @@ +package shell + +import ( + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func init() { + Commands = append(Commands, &commandVolumeCopy{}) +} + +type commandVolumeCopy struct { +} + +func (c *commandVolumeCopy) Name() string { + return "volume.copy" +} + +func (c *commandVolumeCopy) Help() string { + return `copy a volume from one volume server to another volume server + + volume.copy <source volume server host:port> <target volume server host:port> <volume id> + + This command copies a volume from one volume server to another volume server. + Usually you will want to unmount the volume first before copying. + +` +} + +func (c *commandVolumeCopy) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + if len(args) != 3 { + fmt.Fprintf(writer, "received args: %+v\n", args) + return fmt.Errorf("need 3 args of <source volume server host:port> <target volume server host:port> <volume id>") + } + sourceVolumeServer, targetVolumeServer, volumeIdString := args[0], args[1], args[2] + + volumeId, err := needle.NewVolumeId(volumeIdString) + if err != nil { + return fmt.Errorf("wrong volume id format %s: %v", volumeId, err) + } + + if sourceVolumeServer == targetVolumeServer { + return fmt.Errorf("source and target volume servers are the same!") + } + + _, err = copyVolume(commandEnv.option.GrpcDialOption, volumeId, sourceVolumeServer, targetVolumeServer) + return +} diff --git a/weed/shell/command_volume_delete.go b/weed/shell/command_volume_delete.go new file mode 100644 index 000000000..5869b1621 --- /dev/null +++ b/weed/shell/command_volume_delete.go @@ -0,0 +1,46 @@ +package shell + +import ( + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func init() { + Commands = append(Commands, &commandVolumeDelete{}) +} + +type commandVolumeDelete struct { +} + +func (c *commandVolumeDelete) Name() string { + return "volume.delete" +} + +func (c *commandVolumeDelete) Help() string { + return `delete a live volume from one volume server + + volume.delete <volume server host:port> <volume id> + + This command deletes a volume from one volume server. + +` +} + +func (c *commandVolumeDelete) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + if len(args) != 2 { + fmt.Fprintf(writer, "received args: %+v\n", args) + return fmt.Errorf("need 2 args of <volume server host:port> <volume id>") + } + sourceVolumeServer, volumeIdString := args[0], args[1] + + volumeId, err := needle.NewVolumeId(volumeIdString) + if err != nil { + return fmt.Errorf("wrong volume id format %s: %v", volumeId, err) + } + + return deleteVolume(commandEnv.option.GrpcDialOption, volumeId, sourceVolumeServer) + +} diff --git a/weed/shell/command_volume_fix_replication.go b/weed/shell/command_volume_fix_replication.go new file mode 100644 index 000000000..210f4819d --- /dev/null +++ b/weed/shell/command_volume_fix_replication.go @@ -0,0 +1,200 @@ +package shell + +import ( + "context" + "fmt" + "io" + "math/rand" + "sort" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" +) + +func init() { + Commands = append(Commands, &commandVolumeFixReplication{}) +} + +type commandVolumeFixReplication struct { +} + +func (c *commandVolumeFixReplication) Name() string { + return "volume.fix.replication" +} + +func (c *commandVolumeFixReplication) Help() string { + return `add replicas to volumes that are missing replicas + + This command file all under-replicated volumes, and find volume servers with free slots. + If the free slots satisfy the replication requirement, the volume content is copied over and mounted. + + volume.fix.replication -n # do not take action + volume.fix.replication # actually copying the volume files and mount the volume + + Note: + * each time this will only add back one replica for one volume id. If there are multiple replicas + are missing, e.g. multiple volume servers are new, you may need to run this multiple times. + * do not run this too quick within seconds, since the new volume replica may take a few seconds + to register itself to the master. + +` +} + +func (c *commandVolumeFixReplication) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + takeAction := true + if len(args) > 0 && args[0] == "-n" { + takeAction = false + } + + var resp *master_pb.VolumeListResponse + err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{}) + return err + }) + if err != nil { + return err + } + + // find all volumes that needs replication + // collect all data nodes + replicatedVolumeLocations := make(map[uint32][]location) + replicatedVolumeInfo := make(map[uint32]*master_pb.VolumeInformationMessage) + var allLocations []location + eachDataNode(resp.TopologyInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { + loc := newLocation(dc, string(rack), dn) + for _, v := range dn.VolumeInfos { + if v.ReplicaPlacement > 0 { + replicatedVolumeLocations[v.Id] = append(replicatedVolumeLocations[v.Id], loc) + replicatedVolumeInfo[v.Id] = v + } + } + allLocations = append(allLocations, loc) + }) + + // find all under replicated volumes + underReplicatedVolumeLocations := make(map[uint32][]location) + for vid, locations := range replicatedVolumeLocations { + volumeInfo := replicatedVolumeInfo[vid] + replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(volumeInfo.ReplicaPlacement)) + if replicaPlacement.GetCopyCount() > len(locations) { + underReplicatedVolumeLocations[vid] = locations + } + } + + if len(underReplicatedVolumeLocations) == 0 { + return fmt.Errorf("no under replicated volumes") + } + + if len(allLocations) == 0 { + return fmt.Errorf("no data nodes at all") + } + + // find the most under populated data nodes + keepDataNodesSorted(allLocations) + + for vid, locations := range underReplicatedVolumeLocations { + volumeInfo := replicatedVolumeInfo[vid] + replicaPlacement, _ := super_block.NewReplicaPlacementFromByte(byte(volumeInfo.ReplicaPlacement)) + foundNewLocation := false + for _, dst := range allLocations { + // check whether data nodes satisfy the constraints + if dst.dataNode.FreeVolumeCount > 0 && satisfyReplicaPlacement(replicaPlacement, locations, dst) { + // ask the volume server to replicate the volume + sourceNodes := underReplicatedVolumeLocations[vid] + sourceNode := sourceNodes[rand.Intn(len(sourceNodes))] + foundNewLocation = true + fmt.Fprintf(writer, "replicating volume %d %s from %s to dataNode %s ...\n", volumeInfo.Id, replicaPlacement, sourceNode.dataNode.Id, dst.dataNode.Id) + + if !takeAction { + break + } + + err := operation.WithVolumeServerClient(dst.dataNode.Id, commandEnv.option.GrpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, replicateErr := volumeServerClient.VolumeCopy(context.Background(), &volume_server_pb.VolumeCopyRequest{ + VolumeId: volumeInfo.Id, + SourceDataNode: sourceNode.dataNode.Id, + }) + return replicateErr + }) + + if err != nil { + return err + } + + // adjust free volume count + dst.dataNode.FreeVolumeCount-- + keepDataNodesSorted(allLocations) + break + } + } + if !foundNewLocation { + fmt.Fprintf(writer, "failed to place volume %d replica as %s, existing:%+v\n", volumeInfo.Id, replicaPlacement, locations) + } + + } + + return nil +} + +func keepDataNodesSorted(dataNodes []location) { + sort.Slice(dataNodes, func(i, j int) bool { + return dataNodes[i].dataNode.FreeVolumeCount > dataNodes[j].dataNode.FreeVolumeCount + }) +} + +func satisfyReplicaPlacement(replicaPlacement *super_block.ReplicaPlacement, existingLocations []location, possibleLocation location) bool { + + existingDataCenters := make(map[string]bool) + existingRacks := make(map[string]bool) + existingDataNodes := make(map[string]bool) + for _, loc := range existingLocations { + existingDataCenters[loc.DataCenter()] = true + existingRacks[loc.Rack()] = true + existingDataNodes[loc.String()] = true + } + + if replicaPlacement.DiffDataCenterCount >= len(existingDataCenters) { + // check dc, good if different from any existing data centers + _, found := existingDataCenters[possibleLocation.DataCenter()] + return !found + } else if replicaPlacement.DiffRackCount >= len(existingRacks) { + // check rack, good if different from any existing racks + _, found := existingRacks[possibleLocation.Rack()] + return !found + } else if replicaPlacement.SameRackCount >= len(existingDataNodes) { + // check data node, good if different from any existing data nodes + _, found := existingDataNodes[possibleLocation.String()] + return !found + } + + return false +} + +type location struct { + dc string + rack string + dataNode *master_pb.DataNodeInfo +} + +func newLocation(dc, rack string, dataNode *master_pb.DataNodeInfo) location { + return location{ + dc: dc, + rack: rack, + dataNode: dataNode, + } +} + +func (l location) String() string { + return fmt.Sprintf("%s %s %s", l.dc, l.rack, l.dataNode.Id) +} + +func (l location) Rack() string { + return fmt.Sprintf("%s %s", l.dc, l.rack) +} + +func (l location) DataCenter() string { + return l.dc +} diff --git a/weed/shell/command_volume_list.go b/weed/shell/command_volume_list.go new file mode 100644 index 000000000..c5a9388fa --- /dev/null +++ b/weed/shell/command_volume_list.go @@ -0,0 +1,133 @@ +package shell + +import ( + "context" + "fmt" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + + "io" + "sort" +) + +func init() { + Commands = append(Commands, &commandVolumeList{}) +} + +type commandVolumeList struct { +} + +func (c *commandVolumeList) Name() string { + return "volume.list" +} + +func (c *commandVolumeList) Help() string { + return `list all volumes + + This command list all volumes as a tree of dataCenter > rack > dataNode > volume. + +` +} + +func (c *commandVolumeList) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + var resp *master_pb.VolumeListResponse + err = commandEnv.MasterClient.WithClient(func(client master_pb.SeaweedClient) error { + resp, err = client.VolumeList(context.Background(), &master_pb.VolumeListRequest{}) + return err + }) + if err != nil { + return err + } + + writeTopologyInfo(writer, resp.TopologyInfo, resp.VolumeSizeLimitMb) + return nil +} + +func writeTopologyInfo(writer io.Writer, t *master_pb.TopologyInfo, volumeSizeLimitMb uint64) statistics { + fmt.Fprintf(writer, "Topology volume:%d/%d active:%d free:%d remote:%d volumeSizeLimit:%d MB\n", t.VolumeCount, t.MaxVolumeCount, t.ActiveVolumeCount, t.FreeVolumeCount, t.RemoteVolumeCount, volumeSizeLimitMb) + sort.Slice(t.DataCenterInfos, func(i, j int) bool { + return t.DataCenterInfos[i].Id < t.DataCenterInfos[j].Id + }) + var s statistics + for _, dc := range t.DataCenterInfos { + s = s.plus(writeDataCenterInfo(writer, dc)) + } + fmt.Fprintf(writer, "%+v \n", s) + return s +} +func writeDataCenterInfo(writer io.Writer, t *master_pb.DataCenterInfo) statistics { + fmt.Fprintf(writer, " DataCenter %s volume:%d/%d active:%d free:%d remote:%d\n", t.Id, t.VolumeCount, t.MaxVolumeCount, t.ActiveVolumeCount, t.FreeVolumeCount, t.RemoteVolumeCount) + var s statistics + sort.Slice(t.RackInfos, func(i, j int) bool { + return t.RackInfos[i].Id < t.RackInfos[j].Id + }) + for _, r := range t.RackInfos { + s = s.plus(writeRackInfo(writer, r)) + } + fmt.Fprintf(writer, " DataCenter %s %+v \n", t.Id, s) + return s +} +func writeRackInfo(writer io.Writer, t *master_pb.RackInfo) statistics { + fmt.Fprintf(writer, " Rack %s volume:%d/%d active:%d free:%d remote:%d\n", t.Id, t.VolumeCount, t.MaxVolumeCount, t.ActiveVolumeCount, t.FreeVolumeCount, t.RemoteVolumeCount) + var s statistics + sort.Slice(t.DataNodeInfos, func(i, j int) bool { + return t.DataNodeInfos[i].Id < t.DataNodeInfos[j].Id + }) + for _, dn := range t.DataNodeInfos { + s = s.plus(writeDataNodeInfo(writer, dn)) + } + fmt.Fprintf(writer, " Rack %s %+v \n", t.Id, s) + return s +} +func writeDataNodeInfo(writer io.Writer, t *master_pb.DataNodeInfo) statistics { + fmt.Fprintf(writer, " DataNode %s volume:%d/%d active:%d free:%d remote:%d\n", t.Id, t.VolumeCount, t.MaxVolumeCount, t.ActiveVolumeCount, t.FreeVolumeCount, t.RemoteVolumeCount) + var s statistics + sort.Slice(t.VolumeInfos, func(i, j int) bool { + return t.VolumeInfos[i].Id < t.VolumeInfos[j].Id + }) + for _, vi := range t.VolumeInfos { + s = s.plus(writeVolumeInformationMessage(writer, vi)) + } + for _, ecShardInfo := range t.EcShardInfos { + fmt.Fprintf(writer, " ec volume id:%v collection:%v shards:%v\n", ecShardInfo.Id, ecShardInfo.Collection, erasure_coding.ShardBits(ecShardInfo.EcIndexBits).ShardIds()) + } + fmt.Fprintf(writer, " DataNode %s %+v \n", t.Id, s) + return s +} +func writeVolumeInformationMessage(writer io.Writer, t *master_pb.VolumeInformationMessage) statistics { + fmt.Fprintf(writer, " volume %+v \n", t) + return newStatistics(t) +} + +type statistics struct { + Size uint64 + FileCount uint64 + DeletedFileCount uint64 + DeletedBytes uint64 +} + +func newStatistics(t *master_pb.VolumeInformationMessage) statistics { + return statistics{ + Size: t.Size, + FileCount: t.FileCount, + DeletedFileCount: t.DeleteCount, + DeletedBytes: t.DeletedByteCount, + } +} + +func (s statistics) plus(t statistics) statistics { + return statistics{ + Size: s.Size + t.Size, + FileCount: s.FileCount + t.FileCount, + DeletedFileCount: s.DeletedFileCount + t.DeletedFileCount, + DeletedBytes: s.DeletedBytes + t.DeletedBytes, + } +} + +func (s statistics) String() string { + if s.DeletedFileCount > 0 { + return fmt.Sprintf("total size:%d file_count:%d deleted_file:%d deleted_bytes:%d", s.Size, s.FileCount, s.DeletedFileCount, s.DeletedBytes) + } + return fmt.Sprintf("total size:%d file_count:%d", s.Size, s.FileCount) +} diff --git a/weed/shell/command_volume_mount.go b/weed/shell/command_volume_mount.go new file mode 100644 index 000000000..cffc7136b --- /dev/null +++ b/weed/shell/command_volume_mount.go @@ -0,0 +1,59 @@ +package shell + +import ( + "context" + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "google.golang.org/grpc" +) + +func init() { + Commands = append(Commands, &commandVolumeMount{}) +} + +type commandVolumeMount struct { +} + +func (c *commandVolumeMount) Name() string { + return "volume.mount" +} + +func (c *commandVolumeMount) Help() string { + return `mount a volume from one volume server + + volume.mount <volume server host:port> <volume id> + + This command mounts a volume from one volume server. + +` +} + +func (c *commandVolumeMount) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + if len(args) != 2 { + fmt.Fprintf(writer, "received args: %+v\n", args) + return fmt.Errorf("need 2 args of <volume server host:port> <volume id>") + } + sourceVolumeServer, volumeIdString := args[0], args[1] + + volumeId, err := needle.NewVolumeId(volumeIdString) + if err != nil { + return fmt.Errorf("wrong volume id format %s: %v", volumeId, err) + } + + return mountVolume(commandEnv.option.GrpcDialOption, volumeId, sourceVolumeServer) + +} + +func mountVolume(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceVolumeServer string) (err error) { + return operation.WithVolumeServerClient(sourceVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, mountErr := volumeServerClient.VolumeMount(context.Background(), &volume_server_pb.VolumeMountRequest{ + VolumeId: uint32(volumeId), + }) + return mountErr + }) +} diff --git a/weed/shell/command_volume_move.go b/weed/shell/command_volume_move.go new file mode 100644 index 000000000..c25b953a5 --- /dev/null +++ b/weed/shell/command_volume_move.go @@ -0,0 +1,125 @@ +package shell + +import ( + "context" + "fmt" + "io" + "log" + "time" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "google.golang.org/grpc" +) + +func init() { + Commands = append(Commands, &commandVolumeMove{}) +} + +type commandVolumeMove struct { +} + +func (c *commandVolumeMove) Name() string { + return "volume.move" +} + +func (c *commandVolumeMove) Help() string { + return `move a live volume from one volume server to another volume server + + volume.move <source volume server host:port> <target volume server host:port> <volume id> + + This command move a live volume from one volume server to another volume server. Here are the steps: + + 1. This command asks the target volume server to copy the source volume from source volume server, remember the last entry's timestamp. + 2. This command asks the target volume server to mount the new volume + Now the master will mark this volume id as readonly. + 3. This command asks the target volume server to tail the source volume for updates after the timestamp, for 1 minutes to drain the requests. + 4. This command asks the source volume server to unmount the source volume + Now the master will mark this volume id as writable. + 5. This command asks the source volume server to delete the source volume + +` +} + +func (c *commandVolumeMove) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + if len(args) != 3 { + fmt.Fprintf(writer, "received args: %+v\n", args) + return fmt.Errorf("need 3 args of <source volume server host:port> <target volume server host:port> <volume id>") + } + sourceVolumeServer, targetVolumeServer, volumeIdString := args[0], args[1], args[2] + + volumeId, err := needle.NewVolumeId(volumeIdString) + if err != nil { + return fmt.Errorf("wrong volume id format %s: %v", volumeId, err) + } + + if sourceVolumeServer == targetVolumeServer { + return fmt.Errorf("source and target volume servers are the same!") + } + + return LiveMoveVolume(commandEnv.option.GrpcDialOption, volumeId, sourceVolumeServer, targetVolumeServer, 5*time.Second) +} + +// LiveMoveVolume moves one volume from one source volume server to one target volume server, with idleTimeout to drain the incoming requests. +func LiveMoveVolume(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceVolumeServer, targetVolumeServer string, idleTimeout time.Duration) (err error) { + + log.Printf("copying volume %d from %s to %s", volumeId, sourceVolumeServer, targetVolumeServer) + lastAppendAtNs, err := copyVolume(grpcDialOption, volumeId, sourceVolumeServer, targetVolumeServer) + if err != nil { + return fmt.Errorf("copy volume %d from %s to %s: %v", volumeId, sourceVolumeServer, targetVolumeServer, err) + } + + log.Printf("tailing volume %d from %s to %s", volumeId, sourceVolumeServer, targetVolumeServer) + if err = tailVolume(grpcDialOption, volumeId, sourceVolumeServer, targetVolumeServer, lastAppendAtNs, idleTimeout); err != nil { + return fmt.Errorf("tail volume %d from %s to %s: %v", volumeId, sourceVolumeServer, targetVolumeServer, err) + } + + log.Printf("deleting volume %d from %s", volumeId, sourceVolumeServer) + if err = deleteVolume(grpcDialOption, volumeId, sourceVolumeServer); err != nil { + return fmt.Errorf("delete volume %d from %s: %v", volumeId, sourceVolumeServer, err) + } + + log.Printf("moved volume %d from %s to %s", volumeId, sourceVolumeServer, targetVolumeServer) + return nil +} + +func copyVolume(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceVolumeServer, targetVolumeServer string) (lastAppendAtNs uint64, err error) { + + err = operation.WithVolumeServerClient(targetVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + resp, replicateErr := volumeServerClient.VolumeCopy(context.Background(), &volume_server_pb.VolumeCopyRequest{ + VolumeId: uint32(volumeId), + SourceDataNode: sourceVolumeServer, + }) + if replicateErr == nil { + lastAppendAtNs = resp.LastAppendAtNs + } + return replicateErr + }) + + return +} + +func tailVolume(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceVolumeServer, targetVolumeServer string, lastAppendAtNs uint64, idleTimeout time.Duration) (err error) { + + return operation.WithVolumeServerClient(targetVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, replicateErr := volumeServerClient.VolumeTailReceiver(context.Background(), &volume_server_pb.VolumeTailReceiverRequest{ + VolumeId: uint32(volumeId), + SinceNs: lastAppendAtNs, + IdleTimeoutSeconds: uint32(idleTimeout.Seconds()), + SourceVolumeServer: sourceVolumeServer, + }) + return replicateErr + }) + +} + +func deleteVolume(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceVolumeServer string) (err error) { + return operation.WithVolumeServerClient(sourceVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, deleteErr := volumeServerClient.VolumeDelete(context.Background(), &volume_server_pb.VolumeDeleteRequest{ + VolumeId: uint32(volumeId), + }) + return deleteErr + }) +} diff --git a/weed/shell/command_volume_tier_download.go b/weed/shell/command_volume_tier_download.go new file mode 100644 index 000000000..756dc4686 --- /dev/null +++ b/weed/shell/command_volume_tier_download.go @@ -0,0 +1,166 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "io" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func init() { + Commands = append(Commands, &commandVolumeTierDownload{}) +} + +type commandVolumeTierDownload struct { +} + +func (c *commandVolumeTierDownload) Name() string { + return "volume.tier.download" +} + +func (c *commandVolumeTierDownload) Help() string { + return `download the dat file of a volume from a remote tier + + volume.tier.download [-collection=""] + volume.tier.download [-collection=""] -volumeId=<volume_id> + + e.g.: + volume.tier.download -volumeId=7 + volume.tier.download -volumeId=7 + + This command will download the dat file of a volume from a remote tier to a volume server in local cluster. + +` +} + +func (c *commandVolumeTierDownload) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + volumeId := tierCommand.Int("volumeId", 0, "the volume id") + collection := tierCommand.String("collection", "", "the collection name") + if err = tierCommand.Parse(args); err != nil { + return nil + } + + vid := needle.VolumeId(*volumeId) + + // collect topology information + topologyInfo, err := collectTopologyInfo(commandEnv) + if err != nil { + return err + } + + // volumeId is provided + if vid != 0 { + return doVolumeTierDownload(commandEnv, writer, *collection, vid) + } + + // apply to all volumes in the collection + // reusing collectVolumeIdsForEcEncode for now + volumeIds := collectRemoteVolumes(topologyInfo, *collection) + if err != nil { + return err + } + fmt.Printf("tier download volumes: %v\n", volumeIds) + for _, vid := range volumeIds { + if err = doVolumeTierDownload(commandEnv, writer, *collection, vid); err != nil { + return err + } + } + + return nil +} + +func collectRemoteVolumes(topoInfo *master_pb.TopologyInfo, selectedCollection string) (vids []needle.VolumeId) { + + vidMap := make(map[uint32]bool) + eachDataNode(topoInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) { + for _, v := range dn.VolumeInfos { + if v.Collection == selectedCollection && v.RemoteStorageKey != "" && v.RemoteStorageName != "" { + vidMap[v.Id] = true + } + } + }) + + for vid := range vidMap { + vids = append(vids, needle.VolumeId(vid)) + } + + return +} + +func doVolumeTierDownload(commandEnv *CommandEnv, writer io.Writer, collection string, vid needle.VolumeId) (err error) { + // find volume location + locations, found := commandEnv.MasterClient.GetLocations(uint32(vid)) + if !found { + return fmt.Errorf("volume %d not found", vid) + } + + // TODO parallelize this + for _, loc := range locations { + // copy the .dat file from remote tier to local + err = downloadDatFromRemoteTier(commandEnv.option.GrpcDialOption, writer, needle.VolumeId(vid), collection, loc.Url) + if err != nil { + return fmt.Errorf("download dat file for volume %d to %s: %v", vid, loc.Url, err) + } + } + + return nil +} + +func downloadDatFromRemoteTier(grpcDialOption grpc.DialOption, writer io.Writer, volumeId needle.VolumeId, collection string, targetVolumeServer string) error { + + err := operation.WithVolumeServerClient(targetVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + stream, downloadErr := volumeServerClient.VolumeTierMoveDatFromRemote(context.Background(), &volume_server_pb.VolumeTierMoveDatFromRemoteRequest{ + VolumeId: uint32(volumeId), + Collection: collection, + }) + + var lastProcessed int64 + for { + resp, recvErr := stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + break + } else { + return recvErr + } + } + + processingSpeed := float64(resp.Processed-lastProcessed) / 1024.0 / 1024.0 + + fmt.Fprintf(writer, "downloaded %.2f%%, %d bytes, %.2fMB/s\n", resp.ProcessedPercentage, resp.Processed, processingSpeed) + + lastProcessed = resp.Processed + } + if downloadErr != nil { + return downloadErr + } + + _, unmountErr := volumeServerClient.VolumeUnmount(context.Background(), &volume_server_pb.VolumeUnmountRequest{ + VolumeId: uint32(volumeId), + }) + if unmountErr != nil { + return unmountErr + } + + _, mountErr := volumeServerClient.VolumeMount(context.Background(), &volume_server_pb.VolumeMountRequest{ + VolumeId: uint32(volumeId), + }) + if mountErr != nil { + return mountErr + } + + return nil + }) + + return err + +} diff --git a/weed/shell/command_volume_tier_upload.go b/weed/shell/command_volume_tier_upload.go new file mode 100644 index 000000000..5131e8f85 --- /dev/null +++ b/weed/shell/command_volume_tier_upload.go @@ -0,0 +1,147 @@ +package shell + +import ( + "context" + "flag" + "fmt" + "io" + "time" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func init() { + Commands = append(Commands, &commandVolumeTierUpload{}) +} + +type commandVolumeTierUpload struct { +} + +func (c *commandVolumeTierUpload) Name() string { + return "volume.tier.upload" +} + +func (c *commandVolumeTierUpload) Help() string { + return `upload the dat file of a volume to a remote tier + + volume.tier.upload [-collection=""] [-fullPercent=95] [-quietFor=1h] + volume.tier.upload [-collection=""] -volumeId=<volume_id> -dest=<storage_backend> [-keepLocalDatFile] + + e.g.: + volume.tier.upload -volumeId=7 -dest=s3 + volume.tier.upload -volumeId=7 -dest=s3.default + + The <storage_backend> is defined in master.toml. + For example, "s3.default" in [storage.backend.s3.default] + + This command will move the dat file of a volume to a remote tier. + + SeaweedFS enables scalable and fast local access to lots of files, + and the cloud storage is slower by cost efficient. How to combine them together? + + Usually the data follows 80/20 rule: only 20% of data is frequently accessed. + We can offload the old volumes to the cloud. + + With this, SeaweedFS can be both fast and scalable, and infinite storage space. + Just add more local SeaweedFS volume servers to increase the throughput. + + The index file is still local, and the same O(1) disk read is applied to the remote file. + +` +} + +func (c *commandVolumeTierUpload) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + volumeId := tierCommand.Int("volumeId", 0, "the volume id") + collection := tierCommand.String("collection", "", "the collection name") + fullPercentage := tierCommand.Float64("fullPercent", 95, "the volume reaches the percentage of max volume size") + quietPeriod := tierCommand.Duration("quietFor", 24*time.Hour, "select volumes without no writes for this period") + dest := tierCommand.String("dest", "", "the target tier name") + keepLocalDatFile := tierCommand.Bool("keepLocalDatFile", false, "whether keep local dat file") + if err = tierCommand.Parse(args); err != nil { + return nil + } + + vid := needle.VolumeId(*volumeId) + + // volumeId is provided + if vid != 0 { + return doVolumeTierUpload(commandEnv, writer, *collection, vid, *dest, *keepLocalDatFile) + } + + // apply to all volumes in the collection + // reusing collectVolumeIdsForEcEncode for now + volumeIds, err := collectVolumeIdsForEcEncode(commandEnv, *collection, *fullPercentage, *quietPeriod) + if err != nil { + return err + } + fmt.Printf("tier upload volumes: %v\n", volumeIds) + for _, vid := range volumeIds { + if err = doVolumeTierUpload(commandEnv, writer, *collection, vid, *dest, *keepLocalDatFile); err != nil { + return err + } + } + + return nil +} + +func doVolumeTierUpload(commandEnv *CommandEnv, writer io.Writer, collection string, vid needle.VolumeId, dest string, keepLocalDatFile bool) (err error) { + // find volume location + locations, found := commandEnv.MasterClient.GetLocations(uint32(vid)) + if !found { + return fmt.Errorf("volume %d not found", vid) + } + + err = markVolumeReadonly(commandEnv.option.GrpcDialOption, needle.VolumeId(vid), locations) + if err != nil { + return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err) + } + + // copy the .dat file to remote tier + err = uploadDatToRemoteTier(commandEnv.option.GrpcDialOption, writer, needle.VolumeId(vid), collection, locations[0].Url, dest, keepLocalDatFile) + if err != nil { + return fmt.Errorf("copy dat file for volume %d on %s to %s: %v", vid, locations[0].Url, dest, err) + } + + return nil +} + +func uploadDatToRemoteTier(grpcDialOption grpc.DialOption, writer io.Writer, volumeId needle.VolumeId, collection string, sourceVolumeServer string, dest string, keepLocalDatFile bool) error { + + err := operation.WithVolumeServerClient(sourceVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + stream, copyErr := volumeServerClient.VolumeTierMoveDatToRemote(context.Background(), &volume_server_pb.VolumeTierMoveDatToRemoteRequest{ + VolumeId: uint32(volumeId), + Collection: collection, + DestinationBackendName: dest, + KeepLocalDatFile: keepLocalDatFile, + }) + + var lastProcessed int64 + for { + resp, recvErr := stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + break + } else { + return recvErr + } + } + + processingSpeed := float64(resp.Processed-lastProcessed) / 1024.0 / 1024.0 + + fmt.Fprintf(writer, "copied %.2f%%, %d bytes, %.2fMB/s\n", resp.ProcessedPercentage, resp.Processed, processingSpeed) + + lastProcessed = resp.Processed + } + + return copyErr + }) + + return err + +} diff --git a/weed/shell/command_volume_unmount.go b/weed/shell/command_volume_unmount.go new file mode 100644 index 000000000..6e5bef485 --- /dev/null +++ b/weed/shell/command_volume_unmount.go @@ -0,0 +1,59 @@ +package shell + +import ( + "context" + "fmt" + "io" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "google.golang.org/grpc" +) + +func init() { + Commands = append(Commands, &commandVolumeUnmount{}) +} + +type commandVolumeUnmount struct { +} + +func (c *commandVolumeUnmount) Name() string { + return "volume.unmount" +} + +func (c *commandVolumeUnmount) Help() string { + return `unmount a volume from one volume server + + volume.unmount <volume server host:port> <volume id> + + This command unmounts a volume from one volume server. + +` +} + +func (c *commandVolumeUnmount) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) { + + if len(args) != 2 { + fmt.Fprintf(writer, "received args: %+v\n", args) + return fmt.Errorf("need 2 args of <volume server host:port> <volume id>") + } + sourceVolumeServer, volumeIdString := args[0], args[1] + + volumeId, err := needle.NewVolumeId(volumeIdString) + if err != nil { + return fmt.Errorf("wrong volume id format %s: %v", volumeId, err) + } + + return unmountVolume(commandEnv.option.GrpcDialOption, volumeId, sourceVolumeServer) + +} + +func unmountVolume(grpcDialOption grpc.DialOption, volumeId needle.VolumeId, sourceVolumeServer string) (err error) { + return operation.WithVolumeServerClient(sourceVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + _, unmountErr := volumeServerClient.VolumeUnmount(context.Background(), &volume_server_pb.VolumeUnmountRequest{ + VolumeId: uint32(volumeId), + }) + return unmountErr + }) +} diff --git a/weed/shell/commands.go b/weed/shell/commands.go new file mode 100644 index 000000000..b8832ad93 --- /dev/null +++ b/weed/shell/commands.go @@ -0,0 +1,118 @@ +package shell + +import ( + "fmt" + "io" + "net/url" + "path/filepath" + "strconv" + "strings" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/filer2" + "github.com/chrislusf/seaweedfs/weed/pb/filer_pb" + "github.com/chrislusf/seaweedfs/weed/wdclient" +) + +type ShellOptions struct { + Masters *string + GrpcDialOption grpc.DialOption + // shell transient context + FilerHost string + FilerPort int64 + Directory string +} + +type CommandEnv struct { + env map[string]string + MasterClient *wdclient.MasterClient + option ShellOptions +} + +type command interface { + Name() string + Help() string + Do([]string, *CommandEnv, io.Writer) error +} + +var ( + Commands = []command{} +) + +func NewCommandEnv(options ShellOptions) *CommandEnv { + return &CommandEnv{ + env: make(map[string]string), + MasterClient: wdclient.NewMasterClient(options.GrpcDialOption, "shell", 0, strings.Split(*options.Masters, ",")), + option: options, + } +} + +func (ce *CommandEnv) parseUrl(input string) (filerServer string, filerPort int64, path string, err error) { + if strings.HasPrefix(input, "http") { + return parseFilerUrl(input) + } + if !strings.HasPrefix(input, "/") { + input = filepath.ToSlash(filepath.Join(ce.option.Directory, input)) + } + return ce.option.FilerHost, ce.option.FilerPort, input, err +} + +func (ce *CommandEnv) isDirectory(filerServer string, filerPort int64, path string) bool { + + return ce.checkDirectory(filerServer, filerPort, path) == nil + +} + +func (ce *CommandEnv) checkDirectory(filerServer string, filerPort int64, path string) error { + + dir, name := filer2.FullPath(path).DirAndName() + + return ce.withFilerClient(filerServer, filerPort, func(client filer_pb.SeaweedFilerClient) error { + + resp, lookupErr := filer_pb.LookupEntry(client, &filer_pb.LookupDirectoryEntryRequest{ + Directory: dir, + Name: name, + }) + if lookupErr != nil { + return lookupErr + } + + if !resp.Entry.IsDirectory { + return fmt.Errorf("not a directory") + } + + return nil + }) + +} + +func parseFilerUrl(entryPath string) (filerServer string, filerPort int64, path string, err error) { + if strings.HasPrefix(entryPath, "http") { + var u *url.URL + u, err = url.Parse(entryPath) + if err != nil { + return + } + filerServer = u.Hostname() + portString := u.Port() + if portString != "" { + filerPort, err = strconv.ParseInt(portString, 10, 32) + } + path = u.Path + } else { + err = fmt.Errorf("path should have full url http://<filer_server>:<port>/path/to/dirOrFile : %s", entryPath) + } + return +} + +func findInputDirectory(args []string) (input string) { + input = "." + if len(args) > 0 { + input = args[len(args)-1] + if strings.HasPrefix(input, "-") { + input = "." + } + } + return input +} diff --git a/weed/shell/shell_liner.go b/weed/shell/shell_liner.go new file mode 100644 index 000000000..a4f17e0fa --- /dev/null +++ b/weed/shell/shell_liner.go @@ -0,0 +1,146 @@ +package shell + +import ( + "fmt" + "io" + "os" + "path" + "regexp" + "strings" + + "sort" + + "github.com/peterh/liner" +) + +var ( + line *liner.State + historyPath = path.Join(os.TempDir(), "weed-shell") +) + +func RunShell(options ShellOptions) { + + line = liner.NewLiner() + defer line.Close() + + line.SetCtrlCAborts(true) + + setCompletionHandler() + loadHistory() + + defer saveHistory() + + reg, _ := regexp.Compile(`'.*?'|".*?"|\S+`) + + commandEnv := NewCommandEnv(options) + + go commandEnv.MasterClient.KeepConnectedToMaster() + commandEnv.MasterClient.WaitUntilConnected() + + for { + cmd, err := line.Prompt("> ") + if err != nil { + if err != io.EOF { + fmt.Printf("%v\n", err) + } + return + } + + cmds := reg.FindAllString(cmd, -1) + if len(cmds) == 0 { + continue + } else { + line.AppendHistory(cmd) + + args := make([]string, len(cmds[1:])) + + for i := range args { + args[i] = strings.Trim(string(cmds[1+i]), "\"'") + } + + cmd := strings.ToLower(cmds[0]) + if cmd == "help" || cmd == "?" { + printHelp(cmds) + } else if cmd == "exit" || cmd == "quit" { + return + } else { + foundCommand := false + for _, c := range Commands { + if c.Name() == cmd { + if err := c.Do(args, commandEnv, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + } + foundCommand = true + } + } + if !foundCommand { + fmt.Fprintf(os.Stderr, "unknown command: %v\n", cmd) + } + } + + } + } +} + +func printGenericHelp() { + msg := + `Type: "help <command>" for help on <command> +` + fmt.Print(msg) + + sort.Slice(Commands, func(i, j int) bool { + return strings.Compare(Commands[i].Name(), Commands[j].Name()) < 0 + }) + for _, c := range Commands { + helpTexts := strings.SplitN(c.Help(), "\n", 2) + fmt.Printf(" %-30s\t# %s \n", c.Name(), helpTexts[0]) + } +} + +func printHelp(cmds []string) { + args := cmds[1:] + if len(args) == 0 { + printGenericHelp() + } else if len(args) > 1 { + fmt.Println() + } else { + cmd := strings.ToLower(args[0]) + + sort.Slice(Commands, func(i, j int) bool { + return strings.Compare(Commands[i].Name(), Commands[j].Name()) < 0 + }) + + for _, c := range Commands { + if c.Name() == cmd { + fmt.Printf(" %s\t# %s\n", c.Name(), c.Help()) + } + } + } +} + +func setCompletionHandler() { + line.SetCompleter(func(line string) (c []string) { + for _, i := range Commands { + if strings.HasPrefix(i.Name(), strings.ToLower(line)) { + c = append(c, i.Name()) + } + } + return + }) +} + +func loadHistory() { + if f, err := os.Open(historyPath); err == nil { + line.ReadHistory(f) + f.Close() + } +} + +func saveHistory() { + if f, err := os.Create(historyPath); err != nil { + fmt.Printf("Error writing history file: %v\n", err) + } else { + line.WriteHistory(f) + f.Close() + } +} diff --git a/weed/stats/disk_supported.go b/weed/stats/disk_supported.go index 0537828b0..dff580b5b 100644 --- a/weed/stats/disk_supported.go +++ b/weed/stats/disk_supported.go @@ -17,5 +17,7 @@ func fillInDiskStatus(disk *volume_server_pb.DiskStatus) { disk.All = fs.Blocks * uint64(fs.Bsize) disk.Free = fs.Bfree * uint64(fs.Bsize) disk.Used = disk.All - disk.Free + disk.PercentFree = float32((float64(disk.Free) / float64(disk.All)) * 100) + disk.PercentUsed = float32((float64(disk.Used) / float64(disk.All)) * 100) return } diff --git a/weed/stats/metrics.go b/weed/stats/metrics.go new file mode 100644 index 000000000..ee8763e84 --- /dev/null +++ b/weed/stats/metrics.go @@ -0,0 +1,145 @@ +package stats + +import ( + "fmt" + "os" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/push" +) + +var ( + FilerGather = prometheus.NewRegistry() + VolumeServerGather = prometheus.NewRegistry() + + FilerRequestCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "SeaweedFS", + Subsystem: "filer", + Name: "request_total", + Help: "Counter of filer requests.", + }, []string{"type"}) + + FilerRequestHistogram = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "SeaweedFS", + Subsystem: "filer", + Name: "request_seconds", + Help: "Bucketed histogram of filer request processing time.", + Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24), + }, []string{"type"}) + + FilerStoreCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "SeaweedFS", + Subsystem: "filerStore", + Name: "request_total", + Help: "Counter of filer store requests.", + }, []string{"store", "type"}) + + FilerStoreHistogram = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "SeaweedFS", + Subsystem: "filerStore", + Name: "request_seconds", + Help: "Bucketed histogram of filer store request processing time.", + Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24), + }, []string{"store", "type"}) + + VolumeServerRequestCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "SeaweedFS", + Subsystem: "volumeServer", + Name: "request_total", + Help: "Counter of volume server requests.", + }, []string{"type"}) + + VolumeServerRequestHistogram = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "SeaweedFS", + Subsystem: "volumeServer", + Name: "request_seconds", + Help: "Bucketed histogram of volume server request processing time.", + Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24), + }, []string{"type"}) + + VolumeServerVolumeCounter = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "SeaweedFS", + Subsystem: "volumeServer", + Name: "volumes", + Help: "Number of volumes or shards.", + }, []string{"collection", "type"}) + + VolumeServerMaxVolumeCounter = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "SeaweedFS", + Subsystem: "volumeServer", + Name: "max_volumes", + Help: "Maximum number of volumes.", + }) + + VolumeServerDiskSizeGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "SeaweedFS", + Subsystem: "volumeServer", + Name: "total_disk_size", + Help: "Actual disk size used by volumes.", + }, []string{"collection", "type"}) +) + +func init() { + + FilerGather.MustRegister(FilerRequestCounter) + FilerGather.MustRegister(FilerRequestHistogram) + FilerGather.MustRegister(FilerStoreCounter) + FilerGather.MustRegister(FilerStoreHistogram) + FilerGather.MustRegister(prometheus.NewGoCollector()) + + VolumeServerGather.MustRegister(VolumeServerRequestCounter) + VolumeServerGather.MustRegister(VolumeServerRequestHistogram) + VolumeServerGather.MustRegister(VolumeServerVolumeCounter) + VolumeServerGather.MustRegister(VolumeServerMaxVolumeCounter) + VolumeServerGather.MustRegister(VolumeServerDiskSizeGauge) + +} + +func LoopPushingMetric(name, instance string, gatherer *prometheus.Registry, fnGetMetricsDest func() (addr string, intervalSeconds int)) { + + if fnGetMetricsDest == nil { + return + } + + addr, intervalSeconds := fnGetMetricsDest() + pusher := push.New(addr, name).Gatherer(gatherer).Grouping("instance", instance) + currentAddr := addr + + for { + if currentAddr != "" { + err := pusher.Push() + if err != nil { + glog.V(0).Infof("could not push metrics to prometheus push gateway %s: %v", addr, err) + } + } + if intervalSeconds <= 0 { + intervalSeconds = 15 + } + time.Sleep(time.Duration(intervalSeconds) * time.Second) + addr, intervalSeconds = fnGetMetricsDest() + if currentAddr != addr { + pusher = push.New(addr, name).Gatherer(gatherer).Grouping("instance", instance) + currentAddr = addr + } + + } +} + +func SourceName(port uint32) string { + hostname, err := os.Hostname() + if err != nil { + return "unknown" + } + return fmt.Sprintf("%s:%d", hostname, port) +} diff --git a/weed/storage/backend/backend.go b/weed/storage/backend/backend.go new file mode 100644 index 000000000..6941ca5a1 --- /dev/null +++ b/weed/storage/backend/backend.go @@ -0,0 +1,135 @@ +package backend + +import ( + "io" + "os" + "strings" + "time" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/spf13/viper" +) + +type BackendStorageFile interface { + io.ReaderAt + io.WriterAt + Truncate(off int64) error + io.Closer + GetStat() (datSize int64, modTime time.Time, err error) + Name() string +} + +type BackendStorage interface { + ToProperties() map[string]string + NewStorageFile(key string, tierInfo *volume_server_pb.VolumeInfo) BackendStorageFile + CopyFile(f *os.File, attributes map[string]string, fn func(progressed int64, percentage float32) error) (key string, size int64, err error) + DownloadFile(fileName string, key string, fn func(progressed int64, percentage float32) error) (size int64, err error) + DeleteFile(key string) (err error) +} + +type StringProperties interface { + GetString(key string) string +} +type StorageType string +type BackendStorageFactory interface { + StorageType() StorageType + BuildStorage(configuration StringProperties, configPrefix string, id string) (BackendStorage, error) +} + +var ( + BackendStorageFactories = make(map[StorageType]BackendStorageFactory) + BackendStorages = make(map[string]BackendStorage) +) + +// used by master to load remote storage configurations +func LoadConfiguration(config *viper.Viper) { + + StorageBackendPrefix := "storage.backend" + + for backendTypeName := range config.GetStringMap(StorageBackendPrefix) { + backendStorageFactory, found := BackendStorageFactories[StorageType(backendTypeName)] + if !found { + glog.Fatalf("backend storage type %s not found", backendTypeName) + } + for backendStorageId := range config.GetStringMap(StorageBackendPrefix + "." + backendTypeName) { + if !config.GetBool(StorageBackendPrefix + "." + backendTypeName + "." + backendStorageId + ".enabled") { + continue + } + backendStorage, buildErr := backendStorageFactory.BuildStorage(config, + StorageBackendPrefix+"."+backendTypeName+"."+backendStorageId+".", backendStorageId) + if buildErr != nil { + glog.Fatalf("fail to create backend storage %s.%s", backendTypeName, backendStorageId) + } + BackendStorages[backendTypeName+"."+backendStorageId] = backendStorage + if backendStorageId == "default" { + BackendStorages[backendTypeName] = backendStorage + } + } + } + +} + +// used by volume server to receive remote storage configurations from master +func LoadFromPbStorageBackends(storageBackends []*master_pb.StorageBackend) { + + for _, storageBackend := range storageBackends { + backendStorageFactory, found := BackendStorageFactories[StorageType(storageBackend.Type)] + if !found { + glog.Warningf("storage type %s not found", storageBackend.Type) + continue + } + backendStorage, buildErr := backendStorageFactory.BuildStorage(newProperties(storageBackend.Properties), "", storageBackend.Id) + if buildErr != nil { + glog.Fatalf("fail to create backend storage %s.%s", storageBackend.Type, storageBackend.Id) + } + BackendStorages[storageBackend.Type+"."+storageBackend.Id] = backendStorage + if storageBackend.Id == "default" { + BackendStorages[storageBackend.Type] = backendStorage + } + } +} + +type Properties struct { + m map[string]string +} + +func newProperties(m map[string]string) *Properties { + return &Properties{m: m} +} + +func (p *Properties) GetString(key string) string { + if v, found := p.m[key]; found { + return v + } + return "" +} + +func ToPbStorageBackends() (backends []*master_pb.StorageBackend) { + for sName, s := range BackendStorages { + sType, sId := BackendNameToTypeId(sName) + if sType == "" { + continue + } + backends = append(backends, &master_pb.StorageBackend{ + Type: sType, + Id: sId, + Properties: s.ToProperties(), + }) + } + return +} + +func BackendNameToTypeId(backendName string) (backendType, backendId string) { + parts := strings.Split(backendName, ".") + if len(parts) == 1 { + return backendName, "default" + } + if len(parts) != 2 { + return + } + + backendType, backendId = parts[0], parts[1] + return +} diff --git a/weed/storage/backend/disk_file.go b/weed/storage/backend/disk_file.go new file mode 100644 index 000000000..c4b3caffb --- /dev/null +++ b/weed/storage/backend/disk_file.go @@ -0,0 +1,50 @@ +package backend + +import ( + "os" + "time" +) + +var ( + _ BackendStorageFile = &DiskFile{} +) + +type DiskFile struct { + File *os.File + fullFilePath string +} + +func NewDiskFile(f *os.File) *DiskFile { + return &DiskFile{ + fullFilePath: f.Name(), + File: f, + } +} + +func (df *DiskFile) ReadAt(p []byte, off int64) (n int, err error) { + return df.File.ReadAt(p, off) +} + +func (df *DiskFile) WriteAt(p []byte, off int64) (n int, err error) { + return df.File.WriteAt(p, off) +} + +func (df *DiskFile) Truncate(off int64) error { + return df.File.Truncate(off) +} + +func (df *DiskFile) Close() error { + return df.File.Close() +} + +func (df *DiskFile) GetStat() (datSize int64, modTime time.Time, err error) { + stat, e := df.File.Stat() + if e == nil { + return stat.Size(), stat.ModTime(), nil + } + return 0, time.Time{}, err +} + +func (df *DiskFile) Name() string { + return df.fullFilePath +} diff --git a/weed/storage/backend/memory_map/memory_map.go b/weed/storage/backend/memory_map/memory_map.go new file mode 100644 index 000000000..5dc7ba33d --- /dev/null +++ b/weed/storage/backend/memory_map/memory_map.go @@ -0,0 +1,30 @@ +package memory_map + +import ( + "os" + "strconv" +) + +type MemoryBuffer struct { + aligned_length uint64 + length uint64 + aligned_ptr uintptr + ptr uintptr + Buffer []byte +} + +type MemoryMap struct { + File *os.File + file_memory_map_handle uintptr + write_map_views []MemoryBuffer + max_length uint64 + End_of_file int64 +} + +func ReadMemoryMapMaxSizeMb(memoryMapMaxSizeMbString string) (uint32, error) { + if memoryMapMaxSizeMbString == "" { + return 0, nil + } + memoryMapMaxSize64, err := strconv.ParseUint(memoryMapMaxSizeMbString, 10, 32) + return uint32(memoryMapMaxSize64), err +} diff --git a/weed/storage/backend/memory_map/memory_map_backend.go b/weed/storage/backend/memory_map/memory_map_backend.go new file mode 100644 index 000000000..03e7308d0 --- /dev/null +++ b/weed/storage/backend/memory_map/memory_map_backend.go @@ -0,0 +1,60 @@ +package memory_map + +import ( + "os" + "time" + + "github.com/chrislusf/seaweedfs/weed/storage/backend" +) + +var ( + _ backend.BackendStorageFile = &MemoryMappedFile{} +) + +type MemoryMappedFile struct { + mm *MemoryMap +} + +func NewMemoryMappedFile(f *os.File, memoryMapSizeMB uint32) *MemoryMappedFile { + mmf := &MemoryMappedFile{ + mm: new(MemoryMap), + } + mmf.mm.CreateMemoryMap(f, 1024*1024*uint64(memoryMapSizeMB)) + return mmf +} + +func (mmf *MemoryMappedFile) ReadAt(p []byte, off int64) (n int, err error) { + readBytes, e := mmf.mm.ReadMemory(uint64(off), uint64(len(p))) + if e != nil { + return 0, e + } + // TODO avoid the extra copy + copy(p, readBytes) + return len(readBytes), nil +} + +func (mmf *MemoryMappedFile) WriteAt(p []byte, off int64) (n int, err error) { + mmf.mm.WriteMemory(uint64(off), uint64(len(p)), p) + return len(p), nil +} + +func (mmf *MemoryMappedFile) Truncate(off int64) error { + return nil +} + +func (mmf *MemoryMappedFile) Close() error { + mmf.mm.DeleteFileAndMemoryMap() + return nil +} + +func (mmf *MemoryMappedFile) GetStat() (datSize int64, modTime time.Time, err error) { + stat, e := mmf.mm.File.Stat() + if e == nil { + return mmf.mm.End_of_file + 1, stat.ModTime(), nil + } + return 0, time.Time{}, err +} + +func (mmf *MemoryMappedFile) Name() string { + return mmf.mm.File.Name() +} diff --git a/weed/storage/backend/memory_map/memory_map_other.go b/weed/storage/backend/memory_map/memory_map_other.go new file mode 100644 index 000000000..e06a0b59a --- /dev/null +++ b/weed/storage/backend/memory_map/memory_map_other.go @@ -0,0 +1,24 @@ +// +build !windows + +package memory_map + +import ( + "fmt" + "os" +) + +func (mMap *MemoryMap) CreateMemoryMap(file *os.File, maxLength uint64) { +} + +func (mMap *MemoryMap) WriteMemory(offset uint64, length uint64, data []byte) { + +} + +func (mMap *MemoryMap) ReadMemory(offset uint64, length uint64) ([]byte, error) { + dataSlice := []byte{} + return dataSlice, fmt.Errorf("Memory Map not implemented for this platform") +} + +func (mBuffer *MemoryMap) DeleteFileAndMemoryMap() { + +} diff --git a/weed/storage/backend/memory_map/memory_map_test.go b/weed/storage/backend/memory_map/memory_map_test.go new file mode 100644 index 000000000..33e1a828c --- /dev/null +++ b/weed/storage/backend/memory_map/memory_map_test.go @@ -0,0 +1,10 @@ +package memory_map + +import "testing" + +func TestMemoryMapMaxSizeReadWrite(t *testing.T) { + memoryMapSize, _ := ReadMemoryMapMaxSizeMb("5000") + if memoryMapSize != 5000 { + t.Errorf("empty memoryMapSize:%v", memoryMapSize) + } +} diff --git a/weed/storage/backend/memory_map/memory_map_windows.go b/weed/storage/backend/memory_map/memory_map_windows.go new file mode 100644 index 000000000..7eb713442 --- /dev/null +++ b/weed/storage/backend/memory_map/memory_map_windows.go @@ -0,0 +1,325 @@ +// +build windows + +package memory_map + +import ( + "os" + "reflect" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +type DWORDLONG = uint64 +type DWORD = uint32 +type WORD = uint16 + +var ( + modkernel32 = syscall.NewLazyDLL("kernel32.dll") + + procGetSystemInfo = modkernel32.NewProc("GetSystemInfo") + procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx") + procGetProcessWorkingSetSize = modkernel32.NewProc("GetProcessWorkingSetSize") + procSetProcessWorkingSetSize = modkernel32.NewProc("SetProcessWorkingSetSize") +) + +var currentProcess, _ = windows.GetCurrentProcess() +var currentMinWorkingSet uint64 = 0 +var currentMaxWorkingSet uint64 = 0 +var _ = getProcessWorkingSetSize(uintptr(currentProcess), ¤tMinWorkingSet, ¤tMaxWorkingSet) + +var systemInfo, _ = getSystemInfo() +var chunkSize = uint64(systemInfo.dwAllocationGranularity) * 128 + +var memoryStatusEx, _ = globalMemoryStatusEx() +var maxMemoryLimitBytes = uint64(float64(memoryStatusEx.ullTotalPhys) * 0.8) + +func (mMap *MemoryMap) CreateMemoryMap(file *os.File, maxLength uint64) { + + chunks := (maxLength / chunkSize) + if chunks*chunkSize < maxLength { + chunks = chunks + 1 + } + + alignedMaxLength := chunks * chunkSize + + maxLength_high := uint32(alignedMaxLength >> 32) + maxLength_low := uint32(alignedMaxLength & 0xFFFFFFFF) + file_memory_map_handle, err := windows.CreateFileMapping(windows.Handle(file.Fd()), nil, windows.PAGE_READWRITE, maxLength_high, maxLength_low, nil) + + if err == nil { + mMap.File = file + mMap.file_memory_map_handle = uintptr(file_memory_map_handle) + mMap.write_map_views = make([]MemoryBuffer, 0, alignedMaxLength/chunkSize) + mMap.max_length = alignedMaxLength + mMap.End_of_file = -1 + } +} + +func (mMap *MemoryMap) DeleteFileAndMemoryMap() { + //First we close the file handles first to delete the file, + //Then we unmap the memory to ensure the unmapping process doesn't write the data to disk + windows.CloseHandle(windows.Handle(mMap.file_memory_map_handle)) + windows.CloseHandle(windows.Handle(mMap.File.Fd())) + + for _, view := range mMap.write_map_views { + view.releaseMemory() + } + + mMap.write_map_views = nil + mMap.max_length = 0 +} + +func min(x, y uint64) uint64 { + if x < y { + return x + } + return y +} + +func (mMap *MemoryMap) WriteMemory(offset uint64, length uint64, data []byte) { + + for { + if ((offset+length)/chunkSize)+1 > uint64(len(mMap.write_map_views)) { + allocateChunk(mMap) + } else { + break + } + } + + remaining_length := length + sliceIndex := offset / chunkSize + sliceOffset := offset - (sliceIndex * chunkSize) + dataOffset := uint64(0) + + for { + writeEnd := min((remaining_length + sliceOffset), chunkSize) + copy(mMap.write_map_views[sliceIndex].Buffer[sliceOffset:writeEnd], data[dataOffset:]) + remaining_length -= (writeEnd - sliceOffset) + dataOffset += (writeEnd - sliceOffset) + + if remaining_length > 0 { + sliceIndex += 1 + sliceOffset = 0 + } else { + break + } + } + + if mMap.End_of_file < int64(offset+length-1) { + mMap.End_of_file = int64(offset + length - 1) + } +} + +func (mMap *MemoryMap) ReadMemory(offset uint64, length uint64) (dataSlice []byte, err error) { + dataSlice = make([]byte, length) + mBuffer, err := allocate(windows.Handle(mMap.file_memory_map_handle), offset, length, false) + copy(dataSlice, mBuffer.Buffer) + mBuffer.releaseMemory() + return dataSlice, err +} + +func (mBuffer *MemoryBuffer) releaseMemory() { + + windows.VirtualUnlock(mBuffer.aligned_ptr, uintptr(mBuffer.aligned_length)) + windows.UnmapViewOfFile(mBuffer.aligned_ptr) + + currentMinWorkingSet -= mBuffer.aligned_length + currentMaxWorkingSet -= mBuffer.aligned_length + + if currentMinWorkingSet < maxMemoryLimitBytes { + var _ = setProcessWorkingSetSize(uintptr(currentProcess), currentMinWorkingSet, currentMaxWorkingSet) + } + + mBuffer.ptr = 0 + mBuffer.aligned_ptr = 0 + mBuffer.length = 0 + mBuffer.aligned_length = 0 + mBuffer.Buffer = nil +} + +func allocateChunk(mMap *MemoryMap) { + start := uint64(len(mMap.write_map_views)) * chunkSize + mBuffer, err := allocate(windows.Handle(mMap.file_memory_map_handle), start, chunkSize, true) + + if err == nil { + mMap.write_map_views = append(mMap.write_map_views, mBuffer) + } +} + +func allocate(hMapFile windows.Handle, offset uint64, length uint64, write bool) (MemoryBuffer, error) { + + mBuffer := MemoryBuffer{} + + //align memory allocations to the minium virtal memory allocation size + dwSysGran := systemInfo.dwAllocationGranularity + + start := (offset / uint64(dwSysGran)) * uint64(dwSysGran) + diff := offset - start + aligned_length := diff + length + + offset_high := uint32(start >> 32) + offset_low := uint32(start & 0xFFFFFFFF) + + access := windows.FILE_MAP_READ + + if write { + access = windows.FILE_MAP_WRITE + } + + currentMinWorkingSet += aligned_length + currentMaxWorkingSet += aligned_length + + if currentMinWorkingSet < maxMemoryLimitBytes { + // increase the process working set size to hint to windows memory manager to + // prioritise keeping this memory mapped in physical memory over other standby memory + var _ = setProcessWorkingSetSize(uintptr(currentProcess), currentMinWorkingSet, currentMaxWorkingSet) + } + + addr_ptr, errno := windows.MapViewOfFile(hMapFile, + uint32(access), // read/write permission + offset_high, + offset_low, + uintptr(aligned_length)) + + if addr_ptr == 0 { + return mBuffer, errno + } + + if currentMinWorkingSet < maxMemoryLimitBytes { + windows.VirtualLock(mBuffer.aligned_ptr, uintptr(mBuffer.aligned_length)) + } + + mBuffer.aligned_ptr = addr_ptr + mBuffer.aligned_length = aligned_length + mBuffer.ptr = addr_ptr + uintptr(diff) + mBuffer.length = length + + slice_header := (*reflect.SliceHeader)(unsafe.Pointer(&mBuffer.Buffer)) + slice_header.Data = addr_ptr + uintptr(diff) + slice_header.Len = int(length) + slice_header.Cap = int(length) + + return mBuffer, nil +} + +//typedef struct _MEMORYSTATUSEX { +// DWORD dwLength; +// DWORD dwMemoryLoad; +// DWORDLONG ullTotalPhys; +// DWORDLONG ullAvailPhys; +// DWORDLONG ullTotalPageFile; +// DWORDLONG ullAvailPageFile; +// DWORDLONG ullTotalVirtual; +// DWORDLONG ullAvailVirtual; +// DWORDLONG ullAvailExtendedVirtual; +// } MEMORYSTATUSEX, *LPMEMORYSTATUSEX; +//https://docs.microsoft.com/en-gb/windows/win32/api/sysinfoapi/ns-sysinfoapi-memorystatusex + +type _MEMORYSTATUSEX struct { + dwLength DWORD + dwMemoryLoad DWORD + ullTotalPhys DWORDLONG + ullAvailPhys DWORDLONG + ullTotalPageFile DWORDLONG + ullAvailPageFile DWORDLONG + ullTotalVirtual DWORDLONG + ullAvailVirtual DWORDLONG + ullAvailExtendedVirtual DWORDLONG +} + +// BOOL GlobalMemoryStatusEx( +// LPMEMORYSTATUSEX lpBuffer +// ); +// https://docs.microsoft.com/en-gb/windows/win32/api/sysinfoapi/nf-sysinfoapi-globalmemorystatusex +func globalMemoryStatusEx() (_MEMORYSTATUSEX, error) { + var mem_status _MEMORYSTATUSEX + + mem_status.dwLength = uint32(unsafe.Sizeof(mem_status)) + _, _, err := procGlobalMemoryStatusEx.Call(uintptr(unsafe.Pointer(&mem_status))) + + if err != syscall.Errno(0) { + return mem_status, err + } + return mem_status, nil +} + +// typedef struct _SYSTEM_INFO { +// union { +// DWORD dwOemId; +// struct { +// WORD wProcessorArchitecture; +// WORD wReserved; +// }; +// }; +// DWORD dwPageSize; +// LPVOID lpMinimumApplicationAddress; +// LPVOID lpMaximumApplicationAddress; +// DWORD_PTR dwActiveProcessorMask; +// DWORD dwNumberOfProcessors; +// DWORD dwProcessorType; +// DWORD dwAllocationGranularity; +// WORD wProcessorLevel; +// WORD wProcessorRevision; +// } SYSTEM_INFO; +// https://docs.microsoft.com/en-gb/windows/win32/api/sysinfoapi/ns-sysinfoapi-system_info +type _SYSTEM_INFO struct { + dwOemId DWORD + dwPageSize DWORD + lpMinimumApplicationAddress uintptr + lpMaximumApplicationAddress uintptr + dwActiveProcessorMask uintptr + dwNumberOfProcessors DWORD + dwProcessorType DWORD + dwAllocationGranularity DWORD + wProcessorLevel WORD + wProcessorRevision WORD +} + +// void WINAPI GetSystemInfo( +// _Out_ LPSYSTEM_INFO lpSystemInfo +// ); +// https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getsysteminfo +func getSystemInfo() (_SYSTEM_INFO, error) { + var si _SYSTEM_INFO + _, _, err := procGetSystemInfo.Call(uintptr(unsafe.Pointer(&si))) + if err != syscall.Errno(0) { + return si, err + } + return si, nil +} + +// BOOL GetProcessWorkingSetSize( +// HANDLE hProcess, +// PSIZE_T lpMinimumWorkingSetSize, +// PSIZE_T lpMaximumWorkingSetSize +// ); +// https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-getprocessworkingsetsize + +func getProcessWorkingSetSize(process uintptr, dwMinWorkingSet *uint64, dwMaxWorkingSet *uint64) error { + r1, _, err := syscall.Syscall(procGetProcessWorkingSetSize.Addr(), 3, process, uintptr(unsafe.Pointer(dwMinWorkingSet)), uintptr(unsafe.Pointer(dwMaxWorkingSet))) + if r1 == 0 { + if err != syscall.Errno(0) { + return err + } + } + return nil +} + +// BOOL SetProcessWorkingSetSize( +// HANDLE hProcess, +// SIZE_T dwMinimumWorkingSetSize, +// SIZE_T dwMaximumWorkingSetSize +// ); +// https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-setprocessworkingsetsize + +func setProcessWorkingSetSize(process uintptr, dwMinWorkingSet uint64, dwMaxWorkingSet uint64) error { + r1, _, err := syscall.Syscall(procSetProcessWorkingSetSize.Addr(), 3, process, uintptr(dwMinWorkingSet), uintptr(dwMaxWorkingSet)) + if r1 == 0 { + if err != syscall.Errno(0) { + return err + } + } + return nil +} diff --git a/weed/storage/backend/memory_map/os_overloads/file_windows.go b/weed/storage/backend/memory_map/os_overloads/file_windows.go new file mode 100644 index 000000000..05aa384e2 --- /dev/null +++ b/weed/storage/backend/memory_map/os_overloads/file_windows.go @@ -0,0 +1,168 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package os_overloads + +import ( + "os" + "syscall" + + "golang.org/x/sys/windows" +) + +func isAbs(path string) (b bool) { + v := volumeName(path) + if v == "" { + return false + } + path = path[len(v):] + if path == "" { + return false + } + return os.IsPathSeparator(path[0]) +} + +func volumeName(path string) (v string) { + if len(path) < 2 { + return "" + } + // with drive letter + c := path[0] + if path[1] == ':' && + ('0' <= c && c <= '9' || 'a' <= c && c <= 'z' || + 'A' <= c && c <= 'Z') { + return path[:2] + } + // is it UNC + if l := len(path); l >= 5 && os.IsPathSeparator(path[0]) && os.IsPathSeparator(path[1]) && + !os.IsPathSeparator(path[2]) && path[2] != '.' { + // first, leading `\\` and next shouldn't be `\`. its server name. + for n := 3; n < l-1; n++ { + // second, next '\' shouldn't be repeated. + if os.IsPathSeparator(path[n]) { + n++ + // third, following something characters. its share name. + if !os.IsPathSeparator(path[n]) { + if path[n] == '.' { + break + } + for ; n < l; n++ { + if os.IsPathSeparator(path[n]) { + break + } + } + return path[:n] + } + break + } + } + } + return "" +} + +// fixLongPath returns the extended-length (\\?\-prefixed) form of +// path when needed, in order to avoid the default 260 character file +// path limit imposed by Windows. If path is not easily converted to +// the extended-length form (for example, if path is a relative path +// or contains .. elements), or is short enough, fixLongPath returns +// path unmodified. +// +// See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath +func fixLongPath(path string) string { + // Do nothing (and don't allocate) if the path is "short". + // Empirically (at least on the Windows Server 2013 builder), + // the kernel is arbitrarily okay with < 248 bytes. That + // matches what the docs above say: + // "When using an API to create a directory, the specified + // path cannot be so long that you cannot append an 8.3 file + // name (that is, the directory name cannot exceed MAX_PATH + // minus 12)." Since MAX_PATH is 260, 260 - 12 = 248. + // + // The MSDN docs appear to say that a normal path that is 248 bytes long + // will work; empirically the path must be less then 248 bytes long. + if len(path) < 248 { + // Don't fix. (This is how Go 1.7 and earlier worked, + // not automatically generating the \\?\ form) + return path + } + + // The extended form begins with \\?\, as in + // \\?\c:\windows\foo.txt or \\?\UNC\server\share\foo.txt. + // The extended form disables evaluation of . and .. path + // elements and disables the interpretation of / as equivalent + // to \. The conversion here rewrites / to \ and elides + // . elements as well as trailing or duplicate separators. For + // simplicity it avoids the conversion entirely for relative + // paths or paths containing .. elements. For now, + // \\server\share paths are not converted to + // \\?\UNC\server\share paths because the rules for doing so + // are less well-specified. + if len(path) >= 2 && path[:2] == `\\` { + // Don't canonicalize UNC paths. + return path + } + if !isAbs(path) { + // Relative path + return path + } + + const prefix = `\\?` + + pathbuf := make([]byte, len(prefix)+len(path)+len(`\`)) + copy(pathbuf, prefix) + n := len(path) + r, w := 0, len(prefix) + for r < n { + switch { + case os.IsPathSeparator(path[r]): + // empty block + r++ + case path[r] == '.' && (r+1 == n || os.IsPathSeparator(path[r+1])): + // /./ + r++ + case r+1 < n && path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])): + // /../ is currently unhandled + return path + default: + pathbuf[w] = '\\' + w++ + for ; r < n && !os.IsPathSeparator(path[r]); r++ { + pathbuf[w] = path[r] + w++ + } + } + } + // A drive's root directory needs a trailing \ + if w == len(`\\?\c:`) { + pathbuf[w] = '\\' + w++ + } + return string(pathbuf[:w]) +} + +// syscallMode returns the syscall-specific mode bits from Go's portable mode bits. +func syscallMode(i os.FileMode) (o uint32) { + o |= uint32(i.Perm()) + if i&os.ModeSetuid != 0 { + o |= syscall.S_ISUID + } + if i&os.ModeSetgid != 0 { + o |= syscall.S_ISGID + } + if i&os.ModeSticky != 0 { + o |= syscall.S_ISVTX + } + // No mapping for Go's ModeTemporary (plan9 only). + return +} + +//If the bool is set to true then the file is opened with the parameters FILE_ATTRIBUTE_TEMPORARY and +// FILE_FLAG_DELETE_ON_CLOSE +func OpenFile(name string, flag int, perm os.FileMode, setToTempAndDelete bool) (file *os.File, err error) { + r, e := Open(fixLongPath(name), flag|windows.O_CLOEXEC, syscallMode(perm), setToTempAndDelete) + if e != nil { + return nil, e + } + return os.NewFile(uintptr(r), name), nil +} diff --git a/weed/storage/backend/memory_map/os_overloads/syscall_windows.go b/weed/storage/backend/memory_map/os_overloads/syscall_windows.go new file mode 100644 index 000000000..081cba431 --- /dev/null +++ b/weed/storage/backend/memory_map/os_overloads/syscall_windows.go @@ -0,0 +1,80 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Windows system calls. + +package os_overloads + +import ( + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +// windows api calls + +//sys CreateFile(name *uint16, access uint32, mode uint32, sa *SecurityAttributes, createmode uint32, attrs uint32, templatefile int32) (handle Handle, err error) [failretval==InvalidHandle] = CreateFileW + +func makeInheritSa() *syscall.SecurityAttributes { + var sa syscall.SecurityAttributes + sa.Length = uint32(unsafe.Sizeof(sa)) + sa.InheritHandle = 1 + return &sa +} + +// opens the +func Open(path string, mode int, perm uint32, setToTempAndDelete bool) (fd syscall.Handle, err error) { + if len(path) == 0 { + return syscall.InvalidHandle, windows.ERROR_FILE_NOT_FOUND + } + pathp, err := syscall.UTF16PtrFromString(path) + if err != nil { + return syscall.InvalidHandle, err + } + var access uint32 + switch mode & (windows.O_RDONLY | windows.O_WRONLY | windows.O_RDWR) { + case windows.O_RDONLY: + access = windows.GENERIC_READ + case windows.O_WRONLY: + access = windows.GENERIC_WRITE + case windows.O_RDWR: + access = windows.GENERIC_READ | windows.GENERIC_WRITE + } + if mode&windows.O_CREAT != 0 { + access |= windows.GENERIC_WRITE + } + if mode&windows.O_APPEND != 0 { + access &^= windows.GENERIC_WRITE + access |= windows.FILE_APPEND_DATA + } + sharemode := uint32(windows.FILE_SHARE_READ | windows.FILE_SHARE_WRITE) + var sa *syscall.SecurityAttributes + if mode&windows.O_CLOEXEC == 0 { + sa = makeInheritSa() + } + var createmode uint32 + switch { + case mode&(windows.O_CREAT|windows.O_EXCL) == (windows.O_CREAT | windows.O_EXCL): + createmode = windows.CREATE_NEW + case mode&(windows.O_CREAT|windows.O_TRUNC) == (windows.O_CREAT | windows.O_TRUNC): + createmode = windows.CREATE_ALWAYS + case mode&windows.O_CREAT == windows.O_CREAT: + createmode = windows.OPEN_ALWAYS + case mode&windows.O_TRUNC == windows.O_TRUNC: + createmode = windows.TRUNCATE_EXISTING + default: + createmode = windows.OPEN_EXISTING + } + + var h syscall.Handle + var e error + + if setToTempAndDelete { + h, e = syscall.CreateFile(pathp, access, sharemode, sa, createmode, (windows.FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_DELETE_ON_CLOSE), 0) + } else { + h, e = syscall.CreateFile(pathp, access, sharemode, sa, createmode, windows.FILE_ATTRIBUTE_NORMAL, 0) + } + return h, e +} diff --git a/weed/storage/backend/memory_map/os_overloads/types_windows.go b/weed/storage/backend/memory_map/os_overloads/types_windows.go new file mode 100644 index 000000000..254ba3002 --- /dev/null +++ b/weed/storage/backend/memory_map/os_overloads/types_windows.go @@ -0,0 +1,9 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package os_overloads + +const ( + FILE_FLAG_DELETE_ON_CLOSE = 0x04000000 +) diff --git a/weed/storage/backend/s3_backend/s3_backend.go b/weed/storage/backend/s3_backend/s3_backend.go new file mode 100644 index 000000000..8d71861c2 --- /dev/null +++ b/weed/storage/backend/s3_backend/s3_backend.go @@ -0,0 +1,177 @@ +package s3_backend + +import ( + "fmt" + "io" + "os" + "strings" + "time" + + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3iface" + "github.com/google/uuid" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/backend" +) + +func init() { + backend.BackendStorageFactories["s3"] = &S3BackendFactory{} +} + +type S3BackendFactory struct { +} + +func (factory *S3BackendFactory) StorageType() backend.StorageType { + return backend.StorageType("s3") +} +func (factory *S3BackendFactory) BuildStorage(configuration backend.StringProperties, configPrefix string, id string) (backend.BackendStorage, error) { + return newS3BackendStorage(configuration, configPrefix, id) +} + +type S3BackendStorage struct { + id string + aws_access_key_id string + aws_secret_access_key string + region string + bucket string + conn s3iface.S3API +} + +func newS3BackendStorage(configuration backend.StringProperties, configPrefix string, id string) (s *S3BackendStorage, err error) { + s = &S3BackendStorage{} + s.id = id + s.aws_access_key_id = configuration.GetString(configPrefix + "aws_access_key_id") + s.aws_secret_access_key = configuration.GetString(configPrefix + "aws_secret_access_key") + s.region = configuration.GetString(configPrefix + "region") + s.bucket = configuration.GetString(configPrefix + "bucket") + s.conn, err = createSession(s.aws_access_key_id, s.aws_secret_access_key, s.region) + + glog.V(0).Infof("created backend storage s3.%s for region %s bucket %s", s.id, s.region, s.bucket) + return +} + +func (s *S3BackendStorage) ToProperties() map[string]string { + m := make(map[string]string) + m["aws_access_key_id"] = s.aws_access_key_id + m["aws_secret_access_key"] = s.aws_secret_access_key + m["region"] = s.region + m["bucket"] = s.bucket + return m +} + +func (s *S3BackendStorage) NewStorageFile(key string, tierInfo *volume_server_pb.VolumeInfo) backend.BackendStorageFile { + if strings.HasPrefix(key, "/") { + key = key[1:] + } + + f := &S3BackendStorageFile{ + backendStorage: s, + key: key, + tierInfo: tierInfo, + } + + return f +} + +func (s *S3BackendStorage) CopyFile(f *os.File, attributes map[string]string, fn func(progressed int64, percentage float32) error) (key string, size int64, err error) { + randomUuid, _ := uuid.NewRandom() + key = randomUuid.String() + + glog.V(1).Infof("copying dat file of %s to remote s3.%s as %s", f.Name(), s.id, key) + + size, err = uploadToS3(s.conn, f.Name(), s.bucket, key, attributes, fn) + + return +} + +func (s *S3BackendStorage) DownloadFile(fileName string, key string, fn func(progressed int64, percentage float32) error) (size int64, err error) { + + glog.V(1).Infof("download dat file of %s from remote s3.%s as %s", fileName, s.id, key) + + size, err = downloadFromS3(s.conn, fileName, s.bucket, key, fn) + + return +} + +func (s *S3BackendStorage) DeleteFile(key string) (err error) { + + glog.V(1).Infof("delete dat file %s from remote", key) + + err = deleteFromS3(s.conn, s.bucket, key) + + return +} + +type S3BackendStorageFile struct { + backendStorage *S3BackendStorage + key string + tierInfo *volume_server_pb.VolumeInfo +} + +func (s3backendStorageFile S3BackendStorageFile) ReadAt(p []byte, off int64) (n int, err error) { + + bytesRange := fmt.Sprintf("bytes=%d-%d", off, off+int64(len(p))-1) + + // glog.V(0).Infof("read %s %s", s3backendStorageFile.key, bytesRange) + + getObjectOutput, getObjectErr := s3backendStorageFile.backendStorage.conn.GetObject(&s3.GetObjectInput{ + Bucket: &s3backendStorageFile.backendStorage.bucket, + Key: &s3backendStorageFile.key, + Range: &bytesRange, + }) + + if getObjectErr != nil { + return 0, fmt.Errorf("bucket %s GetObject %s: %v", s3backendStorageFile.backendStorage.bucket, s3backendStorageFile.key, getObjectErr) + } + defer getObjectOutput.Body.Close() + + glog.V(4).Infof("read %s %s", s3backendStorageFile.key, bytesRange) + glog.V(4).Infof("content range: %s, contentLength: %d", *getObjectOutput.ContentRange, *getObjectOutput.ContentLength) + + for { + if n, err = getObjectOutput.Body.Read(p); err == nil && n < len(p) { + p = p[n:] + } else { + break + } + } + + if err == io.EOF { + err = nil + } + + return +} + +func (s3backendStorageFile S3BackendStorageFile) WriteAt(p []byte, off int64) (n int, err error) { + panic("not implemented") +} + +func (s3backendStorageFile S3BackendStorageFile) Truncate(off int64) error { + panic("not implemented") +} + +func (s3backendStorageFile S3BackendStorageFile) Close() error { + return nil +} + +func (s3backendStorageFile S3BackendStorageFile) GetStat() (datSize int64, modTime time.Time, err error) { + + files := s3backendStorageFile.tierInfo.GetFiles() + + if len(files) == 0 { + err = fmt.Errorf("remote file info not found") + return + } + + datSize = int64(files[0].FileSize) + modTime = time.Unix(int64(files[0].ModifiedTime), 0) + + return +} + +func (s3backendStorageFile S3BackendStorageFile) Name() string { + return s3backendStorageFile.key +} diff --git a/weed/storage/backend/s3_backend/s3_download.go b/weed/storage/backend/s3_backend/s3_download.go new file mode 100644 index 000000000..dbc28446a --- /dev/null +++ b/weed/storage/backend/s3_backend/s3_download.go @@ -0,0 +1,98 @@ +package s3_backend + +import ( + "fmt" + "os" + "sync/atomic" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3iface" + "github.com/aws/aws-sdk-go/service/s3/s3manager" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func downloadFromS3(sess s3iface.S3API, destFileName string, sourceBucket string, sourceKey string, + fn func(progressed int64, percentage float32) error) (fileSize int64, err error) { + + fileSize, err = getFileSize(sess, sourceBucket, sourceKey) + if err != nil { + return + } + + //open the file + f, err := os.OpenFile(destFileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return 0, fmt.Errorf("failed to open file %q, %v", destFileName, err) + } + defer f.Close() + + // Create a downloader with the session and custom options + downloader := s3manager.NewDownloaderWithClient(sess, func(u *s3manager.Downloader) { + u.PartSize = int64(64 * 1024 * 1024) + u.Concurrency = 5 + }) + + fileWriter := &s3DownloadProgressedWriter{ + fp: f, + size: fileSize, + written: 0, + fn: fn, + } + + // Download the file from S3. + fileSize, err = downloader.Download(fileWriter, &s3.GetObjectInput{ + Bucket: aws.String(sourceBucket), + Key: aws.String(sourceKey), + }) + if err != nil { + return fileSize, fmt.Errorf("failed to download file %s: %v", destFileName, err) + } + + glog.V(1).Infof("downloaded file %s\n", destFileName) + + return +} + +// adapted from https://github.com/aws/aws-sdk-go/pull/1868 +// and https://petersouter.xyz/s3-download-progress-bar-in-golang/ +type s3DownloadProgressedWriter struct { + fp *os.File + size int64 + written int64 + fn func(progressed int64, percentage float32) error +} + +func (w *s3DownloadProgressedWriter) WriteAt(p []byte, off int64) (int, error) { + n, err := w.fp.WriteAt(p, off) + if err != nil { + return n, err + } + + // Got the length have read( or means has uploaded), and you can construct your message + atomic.AddInt64(&w.written, int64(n)) + + if w.fn != nil { + written := w.written + if err := w.fn(written, float32(written*100)/float32(w.size)); err != nil { + return n, err + } + } + + return n, err +} + +func getFileSize(svc s3iface.S3API, bucket string, key string) (filesize int64, error error) { + params := &s3.HeadObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + } + + resp, err := svc.HeadObject(params) + if err != nil { + return 0, err + } + + return *resp.ContentLength, nil +} diff --git a/weed/storage/backend/s3_backend/s3_sessions.go b/weed/storage/backend/s3_backend/s3_sessions.go new file mode 100644 index 000000000..5fdbcb66b --- /dev/null +++ b/weed/storage/backend/s3_backend/s3_sessions.go @@ -0,0 +1,62 @@ +package s3_backend + +import ( + "fmt" + "sync" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3iface" +) + +var ( + s3Sessions = make(map[string]s3iface.S3API) + sessionsLock sync.RWMutex +) + +func getSession(region string) (s3iface.S3API, bool) { + sessionsLock.RLock() + defer sessionsLock.RUnlock() + + sess, found := s3Sessions[region] + return sess, found +} + +func createSession(awsAccessKeyId, awsSecretAccessKey, region string) (s3iface.S3API, error) { + + sessionsLock.Lock() + defer sessionsLock.Unlock() + + if t, found := s3Sessions[region]; found { + return t, nil + } + + config := &aws.Config{ + Region: aws.String(region), + } + if awsAccessKeyId != "" && awsSecretAccessKey != "" { + config.Credentials = credentials.NewStaticCredentials(awsAccessKeyId, awsSecretAccessKey, "") + } + + sess, err := session.NewSession(config) + if err != nil { + return nil, fmt.Errorf("create aws session in region %s: %v", region, err) + } + + t := s3.New(sess) + + s3Sessions[region] = t + + return t, nil + +} + +func deleteFromS3(sess s3iface.S3API, sourceBucket string, sourceKey string) (err error) { + _, err = sess.DeleteObject(&s3.DeleteObjectInput{ + Bucket: aws.String(sourceBucket), + Key: aws.String(sourceKey), + }) + return err +} diff --git a/weed/storage/backend/s3_backend/s3_upload.go b/weed/storage/backend/s3_backend/s3_upload.go new file mode 100644 index 000000000..500a85590 --- /dev/null +++ b/weed/storage/backend/s3_backend/s3_upload.go @@ -0,0 +1,114 @@ +package s3_backend + +import ( + "fmt" + "os" + "sync/atomic" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/s3/s3iface" + "github.com/aws/aws-sdk-go/service/s3/s3manager" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +func uploadToS3(sess s3iface.S3API, filename string, destBucket string, destKey string, + attributes map[string]string, + fn func(progressed int64, percentage float32) error) (fileSize int64, err error) { + + //open the file + f, err := os.Open(filename) + if err != nil { + return 0, fmt.Errorf("failed to open file %q, %v", filename, err) + } + defer f.Close() + + info, err := f.Stat() + if err != nil { + return 0, fmt.Errorf("failed to stat file %q, %v", filename, err) + } + + fileSize = info.Size() + + partSize := int64(64 * 1024 * 1024) // The minimum/default allowed part size is 5MB + for partSize*1000 < fileSize { + partSize *= 4 + } + + // Create an uploader with the session and custom options + uploader := s3manager.NewUploaderWithClient(sess, func(u *s3manager.Uploader) { + u.PartSize = partSize + u.Concurrency = 5 + }) + + fileReader := &s3UploadProgressedReader{ + fp: f, + size: fileSize, + read: -fileSize, + fn: fn, + } + + // process tagging + tags := "" + for k, v := range attributes { + if len(tags) > 0 { + tags = tags + "&" + } + tags = tags + k + "=" + v + } + + // Upload the file to S3. + var result *s3manager.UploadOutput + result, err = uploader.Upload(&s3manager.UploadInput{ + Bucket: aws.String(destBucket), + Key: aws.String(destKey), + Body: fileReader, + ACL: aws.String("private"), + ServerSideEncryption: aws.String("AES256"), + StorageClass: aws.String("STANDARD_IA"), + Tagging: aws.String(tags), + }) + + //in case it fails to upload + if err != nil { + return 0, fmt.Errorf("failed to upload file %s: %v", filename, err) + } + glog.V(1).Infof("file %s uploaded to %s\n", filename, result.Location) + + return +} + +// adapted from https://github.com/aws/aws-sdk-go/pull/1868 +type s3UploadProgressedReader struct { + fp *os.File + size int64 + read int64 + fn func(progressed int64, percentage float32) error +} + +func (r *s3UploadProgressedReader) Read(p []byte) (int, error) { + return r.fp.Read(p) +} + +func (r *s3UploadProgressedReader) ReadAt(p []byte, off int64) (int, error) { + n, err := r.fp.ReadAt(p, off) + if err != nil { + return n, err + } + + // Got the length have read( or means has uploaded), and you can construct your message + atomic.AddInt64(&r.read, int64(n)) + + if r.fn != nil { + read := r.read + if err := r.fn(read, float32(read*100)/float32(r.size)); err != nil { + return n, err + } + } + + return n, err +} + +func (r *s3UploadProgressedReader) Seek(offset int64, whence int) (int64, error) { + return r.fp.Seek(offset, whence) +} diff --git a/weed/storage/compact_map.go b/weed/storage/compact_map.go deleted file mode 100644 index 0f56922b8..000000000 --- a/weed/storage/compact_map.go +++ /dev/null @@ -1,210 +0,0 @@ -package storage - -import ( - "strconv" - "sync" -) - -type NeedleValue struct { - Key Key - Offset uint32 `comment:"Volume offset"` //since aligned to 8 bytes, range is 4G*8=32G - Size uint32 `comment:"Size of the data portion"` -} - -const ( - batch = 100000 -) - -type Key uint64 - -func (k Key) String() string { - return strconv.FormatUint(uint64(k), 10) -} - -type CompactSection struct { - sync.RWMutex - values []NeedleValue - overflow map[Key]NeedleValue - start Key - end Key - counter int -} - -func NewCompactSection(start Key) *CompactSection { - return &CompactSection{ - values: make([]NeedleValue, batch), - overflow: make(map[Key]NeedleValue), - start: start, - } -} - -//return old entry size -func (cs *CompactSection) Set(key Key, offset, size uint32) (oldOffset, oldSize uint32) { - cs.Lock() - if key > cs.end { - cs.end = key - } - if i := cs.binarySearchValues(key); i >= 0 { - oldOffset, oldSize = cs.values[i].Offset, cs.values[i].Size - //println("key", key, "old size", ret) - cs.values[i].Offset, cs.values[i].Size = offset, size - } else { - needOverflow := cs.counter >= batch - needOverflow = needOverflow || cs.counter > 0 && cs.values[cs.counter-1].Key > key - if needOverflow { - //println("start", cs.start, "counter", cs.counter, "key", key) - if oldValue, found := cs.overflow[key]; found { - oldOffset, oldSize = oldValue.Offset, oldValue.Size - } - cs.overflow[key] = NeedleValue{Key: key, Offset: offset, Size: size} - } else { - p := &cs.values[cs.counter] - p.Key, p.Offset, p.Size = key, offset, size - //println("added index", cs.counter, "key", key, cs.values[cs.counter].Key) - cs.counter++ - } - } - cs.Unlock() - return -} - -//return old entry size -func (cs *CompactSection) Delete(key Key) uint32 { - cs.Lock() - ret := uint32(0) - if i := cs.binarySearchValues(key); i >= 0 { - if cs.values[i].Size > 0 { - ret = cs.values[i].Size - cs.values[i].Size = 0 - } - } - if v, found := cs.overflow[key]; found { - delete(cs.overflow, key) - ret = v.Size - } - cs.Unlock() - return ret -} -func (cs *CompactSection) Get(key Key) (*NeedleValue, bool) { - cs.RLock() - if v, ok := cs.overflow[key]; ok { - cs.RUnlock() - return &v, true - } - if i := cs.binarySearchValues(key); i >= 0 { - cs.RUnlock() - return &cs.values[i], true - } - cs.RUnlock() - return nil, false -} -func (cs *CompactSection) binarySearchValues(key Key) int { - l, h := 0, cs.counter-1 - if h >= 0 && cs.values[h].Key < key { - return -2 - } - //println("looking for key", key) - for l <= h { - m := (l + h) / 2 - //println("mid", m, "key", cs.values[m].Key, cs.values[m].Offset, cs.values[m].Size) - if cs.values[m].Key < key { - l = m + 1 - } else if key < cs.values[m].Key { - h = m - 1 - } else { - //println("found", m) - return m - } - } - return -1 -} - -//This map assumes mostly inserting increasing keys -type CompactMap struct { - list []*CompactSection -} - -func NewCompactMap() CompactMap { - return CompactMap{} -} - -func (cm *CompactMap) Set(key Key, offset, size uint32) (oldOffset, oldSize uint32) { - x := cm.binarySearchCompactSection(key) - if x < 0 { - //println(x, "creating", len(cm.list), "section, starting", key) - cm.list = append(cm.list, NewCompactSection(key)) - x = len(cm.list) - 1 - //keep compact section sorted by start - for x > 0 { - if cm.list[x-1].start > cm.list[x].start { - cm.list[x-1], cm.list[x] = cm.list[x], cm.list[x-1] - x = x - 1 - } else { - break - } - } - } - return cm.list[x].Set(key, offset, size) -} -func (cm *CompactMap) Delete(key Key) uint32 { - x := cm.binarySearchCompactSection(key) - if x < 0 { - return uint32(0) - } - return cm.list[x].Delete(key) -} -func (cm *CompactMap) Get(key Key) (*NeedleValue, bool) { - x := cm.binarySearchCompactSection(key) - if x < 0 { - return nil, false - } - return cm.list[x].Get(key) -} -func (cm *CompactMap) binarySearchCompactSection(key Key) int { - l, h := 0, len(cm.list)-1 - if h < 0 { - return -5 - } - if cm.list[h].start <= key { - if cm.list[h].counter < batch || key <= cm.list[h].end { - return h - } - return -4 - } - for l <= h { - m := (l + h) / 2 - if key < cm.list[m].start { - h = m - 1 - } else { // cm.list[m].start <= key - if cm.list[m+1].start <= key { - l = m + 1 - } else { - return m - } - } - } - return -3 -} - -// Visit visits all entries or stop if any error when visiting -func (cm *CompactMap) Visit(visit func(NeedleValue) error) error { - for _, cs := range cm.list { - cs.RLock() - for _, v := range cs.overflow { - if err := visit(v); err != nil { - cs.RUnlock() - return err - } - } - for _, v := range cs.values { - if _, found := cs.overflow[v.Key]; !found { - if err := visit(v); err != nil { - cs.RUnlock() - return err - } - } - } - cs.RUnlock() - } - return nil -} diff --git a/weed/storage/disk_location.go b/weed/storage/disk_location.go index 9589d9281..f15303282 100644 --- a/weed/storage/disk_location.go +++ b/weed/storage/disk_location.go @@ -1,76 +1,91 @@ package storage import ( + "fmt" "io/ioutil" "os" "strings" "sync" - "fmt" - "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) type DiskLocation struct { Directory string MaxVolumeCount int - volumes map[VolumeId]*Volume - sync.RWMutex + volumes map[needle.VolumeId]*Volume + volumesLock sync.RWMutex + + // erasure coding + ecVolumes map[needle.VolumeId]*erasure_coding.EcVolume + ecVolumesLock sync.RWMutex } func NewDiskLocation(dir string, maxVolumeCount int) *DiskLocation { location := &DiskLocation{Directory: dir, MaxVolumeCount: maxVolumeCount} - location.volumes = make(map[VolumeId]*Volume) + location.volumes = make(map[needle.VolumeId]*Volume) + location.ecVolumes = make(map[needle.VolumeId]*erasure_coding.EcVolume) return location } -func (l *DiskLocation) volumeIdFromPath(dir os.FileInfo) (VolumeId, string, error) { +func (l *DiskLocation) volumeIdFromPath(dir os.FileInfo) (needle.VolumeId, string, error) { name := dir.Name() - if !dir.IsDir() && strings.HasSuffix(name, ".dat") { - collection := "" - base := name[:len(name)-len(".dat")] - i := strings.LastIndex(base, "_") - if i > 0 { - collection, base = base[0:i], base[i+1:] - } - vol, err := NewVolumeId(base) - return vol, collection, err + if !dir.IsDir() && strings.HasSuffix(name, ".idx") { + base := name[:len(name)-len(".idx")] + collection, volumeId, err := parseCollectionVolumeId(base) + return volumeId, collection, err } return 0, "", fmt.Errorf("Path is not a volume: %s", name) } -func (l *DiskLocation) loadExistingVolume(dir os.FileInfo, needleMapKind NeedleMapType, mutex *sync.RWMutex) { - name := dir.Name() - if !dir.IsDir() && strings.HasSuffix(name, ".dat") { - vid, collection, err := l.volumeIdFromPath(dir) - if err == nil { - mutex.RLock() - _, found := l.volumes[vid] - mutex.RUnlock() - if !found { - if v, e := NewVolume(l.Directory, collection, vid, needleMapKind, nil, nil, 0); e == nil { - mutex.Lock() - l.volumes[vid] = v - mutex.Unlock() - glog.V(0).Infof("data file %s, replicaPlacement=%s v=%d size=%d ttl=%s", - l.Directory+"/"+name, v.ReplicaPlacement, v.Version(), v.Size(), v.Ttl.String()) - } else { - glog.V(0).Infof("new volume %s error %s", name, e) - } - } - } +func parseCollectionVolumeId(base string) (collection string, vid needle.VolumeId, err error) { + i := strings.LastIndex(base, "_") + if i > 0 { + collection, base = base[0:i], base[i+1:] } + vol, err := needle.NewVolumeId(base) + return collection, vol, err } -func (l *DiskLocation) concurrentLoadingVolumes(needleMapKind NeedleMapType, concurrentFlag bool) { - var concurrency int - if concurrentFlag { - //You could choose a better optimized concurency value after testing at your environment - concurrency = 10 - } else { - concurrency = 1 +func (l *DiskLocation) loadExistingVolume(fileInfo os.FileInfo, needleMapKind NeedleMapType) bool { + name := fileInfo.Name() + if !fileInfo.IsDir() && strings.HasSuffix(name, ".idx") { + vid, collection, err := l.volumeIdFromPath(fileInfo) + if err != nil { + glog.Warningf("get volume id failed, %s, err : %s", name, err) + return false + } + + // void loading one volume more than once + l.volumesLock.RLock() + _, found := l.volumes[vid] + l.volumesLock.RUnlock() + if found { + glog.V(1).Infof("loaded volume, %v", vid) + return true + } + + v, e := NewVolume(l.Directory, collection, vid, needleMapKind, nil, nil, 0, 0) + if e != nil { + glog.V(0).Infof("new volume %s error %s", name, e) + return false + } + + l.volumesLock.Lock() + l.volumes[vid] = v + l.volumesLock.Unlock() + size, _, _ := v.FileStat() + glog.V(0).Infof("data file %s, replicaPlacement=%s v=%d size=%d ttl=%s", + l.Directory+"/"+name, v.ReplicaPlacement, v.Version(), size, v.Ttl.String()) + return true } + return false +} + +func (l *DiskLocation) concurrentLoadingVolumes(needleMapKind NeedleMapType, concurrency int) { task_queue := make(chan os.FileInfo, 10*concurrency) go func() { @@ -83,13 +98,12 @@ func (l *DiskLocation) concurrentLoadingVolumes(needleMapKind NeedleMapType, con }() var wg sync.WaitGroup - var mutex sync.RWMutex for workerNum := 0; workerNum < concurrency; workerNum++ { wg.Add(1) go func() { defer wg.Done() for dir := range task_queue { - l.loadExistingVolume(dir, needleMapKind, &mutex) + _ = l.loadExistingVolume(dir, needleMapKind) } }() } @@ -98,30 +112,62 @@ func (l *DiskLocation) concurrentLoadingVolumes(needleMapKind NeedleMapType, con } func (l *DiskLocation) loadExistingVolumes(needleMapKind NeedleMapType) { - l.Lock() - defer l.Unlock() - l.concurrentLoadingVolumes(needleMapKind, true) + l.concurrentLoadingVolumes(needleMapKind, 10) + glog.V(0).Infof("Store started on dir: %s with %d volumes max %d", l.Directory, len(l.volumes), l.MaxVolumeCount) + + l.loadAllEcShards() + glog.V(0).Infof("Store started on dir: %s with %d ec shards", l.Directory, len(l.ecVolumes)) - glog.V(0).Infoln("Store started on dir:", l.Directory, "with", len(l.volumes), "volumes", "max", l.MaxVolumeCount) } func (l *DiskLocation) DeleteCollectionFromDiskLocation(collection string) (e error) { - l.Lock() - defer l.Unlock() - for k, v := range l.volumes { - if v.Collection == collection { - e = l.deleteVolumeById(k) - if e != nil { - return + l.volumesLock.Lock() + delVolsMap := l.unmountVolumeByCollection(collection) + l.volumesLock.Unlock() + + l.ecVolumesLock.Lock() + delEcVolsMap := l.unmountEcVolumeByCollection(collection) + l.ecVolumesLock.Unlock() + + errChain := make(chan error, 2) + var wg sync.WaitGroup + wg.Add(2) + go func() { + for _, v := range delVolsMap { + if err := v.Destroy(); err != nil { + errChain <- err } } + wg.Done() + }() + + go func() { + for _, v := range delEcVolsMap { + v.Destroy() + } + wg.Done() + }() + + go func() { + wg.Wait() + close(errChain) + }() + + errBuilder := strings.Builder{} + for err := range errChain { + errBuilder.WriteString(err.Error()) + errBuilder.WriteString("; ") } + if errBuilder.Len() > 0 { + e = fmt.Errorf(errBuilder.String()) + } + return } -func (l *DiskLocation) deleteVolumeById(vid VolumeId) (e error) { +func (l *DiskLocation) deleteVolumeById(vid needle.VolumeId) (e error) { v, ok := l.volumes[vid] if !ok { return @@ -134,24 +180,16 @@ func (l *DiskLocation) deleteVolumeById(vid VolumeId) (e error) { return } -func (l *DiskLocation) LoadVolume(vid VolumeId, needleMapKind NeedleMapType) bool { - if dirs, err := ioutil.ReadDir(l.Directory); err == nil { - for _, dir := range dirs { - volId, _, err := l.volumeIdFromPath(dir) - if vid == volId && err == nil { - var mutex sync.RWMutex - l.loadExistingVolume(dir, needleMapKind, &mutex) - return true - } - } +func (l *DiskLocation) LoadVolume(vid needle.VolumeId, needleMapKind NeedleMapType) bool { + if fileInfo, found := l.LocateVolume(vid); found { + return l.loadExistingVolume(fileInfo, needleMapKind) } - return false } -func (l *DiskLocation) DeleteVolume(vid VolumeId) error { - l.Lock() - defer l.Unlock() +func (l *DiskLocation) DeleteVolume(vid needle.VolumeId) error { + l.volumesLock.Lock() + defer l.volumesLock.Unlock() _, ok := l.volumes[vid] if !ok { @@ -160,9 +198,9 @@ func (l *DiskLocation) DeleteVolume(vid VolumeId) error { return l.deleteVolumeById(vid) } -func (l *DiskLocation) UnloadVolume(vid VolumeId) error { - l.Lock() - defer l.Unlock() +func (l *DiskLocation) UnloadVolume(vid needle.VolumeId) error { + l.volumesLock.Lock() + defer l.volumesLock.Unlock() v, ok := l.volumes[vid] if !ok { @@ -173,34 +211,67 @@ func (l *DiskLocation) UnloadVolume(vid VolumeId) error { return nil } -func (l *DiskLocation) SetVolume(vid VolumeId, volume *Volume) { - l.Lock() - defer l.Unlock() +func (l *DiskLocation) unmountVolumeByCollection(collectionName string) map[needle.VolumeId]*Volume { + deltaVols := make(map[needle.VolumeId]*Volume, 0) + for k, v := range l.volumes { + if v.Collection == collectionName && !v.isCompacting { + deltaVols[k] = v + } + } + + for k := range deltaVols { + delete(l.volumes, k) + } + return deltaVols +} + +func (l *DiskLocation) SetVolume(vid needle.VolumeId, volume *Volume) { + l.volumesLock.Lock() + defer l.volumesLock.Unlock() l.volumes[vid] = volume } -func (l *DiskLocation) FindVolume(vid VolumeId) (*Volume, bool) { - l.RLock() - defer l.RUnlock() +func (l *DiskLocation) FindVolume(vid needle.VolumeId) (*Volume, bool) { + l.volumesLock.RLock() + defer l.volumesLock.RUnlock() v, ok := l.volumes[vid] return v, ok } func (l *DiskLocation) VolumesLen() int { - l.RLock() - defer l.RUnlock() + l.volumesLock.RLock() + defer l.volumesLock.RUnlock() return len(l.volumes) } func (l *DiskLocation) Close() { - l.Lock() - defer l.Unlock() - + l.volumesLock.Lock() for _, v := range l.volumes { v.Close() } + l.volumesLock.Unlock() + + l.ecVolumesLock.Lock() + for _, ecVolume := range l.ecVolumes { + ecVolume.Close() + } + l.ecVolumesLock.Unlock() + return } + +func (l *DiskLocation) LocateVolume(vid needle.VolumeId) (os.FileInfo, bool) { + if fileInfos, err := ioutil.ReadDir(l.Directory); err == nil { + for _, fileInfo := range fileInfos { + volId, _, err := l.volumeIdFromPath(fileInfo) + if vid == volId && err == nil { + return fileInfo, true + } + } + } + + return nil, false +} diff --git a/weed/storage/disk_location_ec.go b/weed/storage/disk_location_ec.go new file mode 100644 index 000000000..f6c44e966 --- /dev/null +++ b/weed/storage/disk_location_ec.go @@ -0,0 +1,185 @@ +package storage + +import ( + "fmt" + "io/ioutil" + "path" + "regexp" + "sort" + "strconv" + + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +var ( + re = regexp.MustCompile("\\.ec[0-9][0-9]") +) + +func (l *DiskLocation) FindEcVolume(vid needle.VolumeId) (*erasure_coding.EcVolume, bool) { + l.ecVolumesLock.RLock() + defer l.ecVolumesLock.RUnlock() + + ecVolume, ok := l.ecVolumes[vid] + if ok { + return ecVolume, true + } + return nil, false +} + +func (l *DiskLocation) DestroyEcVolume(vid needle.VolumeId) { + l.ecVolumesLock.Lock() + defer l.ecVolumesLock.Unlock() + + ecVolume, found := l.ecVolumes[vid] + if found { + ecVolume.Destroy() + delete(l.ecVolumes, vid) + } +} + +func (l *DiskLocation) FindEcShard(vid needle.VolumeId, shardId erasure_coding.ShardId) (*erasure_coding.EcVolumeShard, bool) { + l.ecVolumesLock.RLock() + defer l.ecVolumesLock.RUnlock() + + ecVolume, ok := l.ecVolumes[vid] + if !ok { + return nil, false + } + for _, ecShard := range ecVolume.Shards { + if ecShard.ShardId == shardId { + return ecShard, true + } + } + return nil, false +} + +func (l *DiskLocation) LoadEcShard(collection string, vid needle.VolumeId, shardId erasure_coding.ShardId) (err error) { + + ecVolumeShard, err := erasure_coding.NewEcVolumeShard(l.Directory, collection, vid, shardId) + if err != nil { + return fmt.Errorf("failed to create ec shard %d.%d: %v", vid, shardId, err) + } + l.ecVolumesLock.Lock() + defer l.ecVolumesLock.Unlock() + ecVolume, found := l.ecVolumes[vid] + if !found { + ecVolume, err = erasure_coding.NewEcVolume(l.Directory, collection, vid) + if err != nil { + return fmt.Errorf("failed to create ec volume %d: %v", vid, err) + } + l.ecVolumes[vid] = ecVolume + } + ecVolume.AddEcVolumeShard(ecVolumeShard) + + return nil +} + +func (l *DiskLocation) UnloadEcShard(vid needle.VolumeId, shardId erasure_coding.ShardId) bool { + + l.ecVolumesLock.Lock() + defer l.ecVolumesLock.Unlock() + + ecVolume, found := l.ecVolumes[vid] + if !found { + return false + } + if _, deleted := ecVolume.DeleteEcVolumeShard(shardId); deleted { + if len(ecVolume.Shards) == 0 { + delete(l.ecVolumes, vid) + ecVolume.Close() + } + return true + } + + return true +} + +func (l *DiskLocation) loadEcShards(shards []string, collection string, vid needle.VolumeId) (err error) { + + for _, shard := range shards { + shardId, err := strconv.ParseInt(path.Ext(shard)[3:], 10, 64) + if err != nil { + return fmt.Errorf("failed to parse ec shard name %v: %v", shard, err) + } + + err = l.LoadEcShard(collection, vid, erasure_coding.ShardId(shardId)) + if err != nil { + return fmt.Errorf("failed to load ec shard %v: %v", shard, err) + } + } + + return nil +} + +func (l *DiskLocation) loadAllEcShards() (err error) { + + fileInfos, err := ioutil.ReadDir(l.Directory) + if err != nil { + return fmt.Errorf("load all ec shards in dir %s: %v", l.Directory, err) + } + + sort.Slice(fileInfos, func(i, j int) bool { + return fileInfos[i].Name() < fileInfos[j].Name() + }) + + var sameVolumeShards []string + var prevVolumeId needle.VolumeId + for _, fileInfo := range fileInfos { + if fileInfo.IsDir() { + continue + } + ext := path.Ext(fileInfo.Name()) + name := fileInfo.Name() + baseName := name[:len(name)-len(ext)] + + collection, volumeId, err := parseCollectionVolumeId(baseName) + if err != nil { + continue + } + + if re.MatchString(ext) { + if prevVolumeId == 0 || volumeId == prevVolumeId { + sameVolumeShards = append(sameVolumeShards, fileInfo.Name()) + } else { + sameVolumeShards = []string{fileInfo.Name()} + } + prevVolumeId = volumeId + continue + } + + if ext == ".ecx" && volumeId == prevVolumeId { + if err = l.loadEcShards(sameVolumeShards, collection, volumeId); err != nil { + return fmt.Errorf("loadEcShards collection:%v volumeId:%d : %v", collection, volumeId, err) + } + prevVolumeId = volumeId + continue + } + + } + return nil +} + +func (l *DiskLocation) deleteEcVolumeById(vid needle.VolumeId) (e error) { + ecVolume, ok := l.ecVolumes[vid] + if !ok { + return + } + ecVolume.Destroy() + delete(l.ecVolumes, vid) + return +} + +func (l *DiskLocation) unmountEcVolumeByCollection(collectionName string) map[needle.VolumeId]*erasure_coding.EcVolume { + deltaVols := make(map[needle.VolumeId]*erasure_coding.EcVolume, 0) + for k, v := range l.ecVolumes { + if v.Collection == collectionName { + deltaVols[k] = v + } + } + + for k, _ := range deltaVols { + delete(l.ecVolumes, k) + } + return deltaVols +} diff --git a/weed/storage/erasure_coding/1.dat b/weed/storage/erasure_coding/1.dat Binary files differnew file mode 100644 index 000000000..869427926 --- /dev/null +++ b/weed/storage/erasure_coding/1.dat diff --git a/weed/storage/erasure_coding/1.idx b/weed/storage/erasure_coding/1.idx Binary files differnew file mode 100644 index 000000000..65a950e64 --- /dev/null +++ b/weed/storage/erasure_coding/1.idx diff --git a/weed/storage/erasure_coding/ec_decoder.go b/weed/storage/erasure_coding/ec_decoder.go new file mode 100644 index 000000000..ae77cee3f --- /dev/null +++ b/weed/storage/erasure_coding/ec_decoder.go @@ -0,0 +1,198 @@ +package erasure_coding + +import ( + "fmt" + "io" + "os" + + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/idx" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +// write .idx file from .ecx and .ecj files +func WriteIdxFileFromEcIndex(baseFileName string) (err error) { + + ecxFile, openErr := os.OpenFile(baseFileName+".ecx", os.O_RDONLY, 0644) + if openErr != nil { + return fmt.Errorf("cannot open ec index %s.ecx: %v", baseFileName, openErr) + } + defer ecxFile.Close() + + idxFile, openErr := os.OpenFile(baseFileName+".idx", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if openErr != nil { + return fmt.Errorf("cannot open %s.idx: %v", baseFileName, openErr) + } + defer idxFile.Close() + + io.Copy(idxFile, ecxFile) + + err = iterateEcjFile(baseFileName, func(key types.NeedleId) error { + + bytes := needle_map.ToBytes(key, types.Offset{}, types.TombstoneFileSize) + idxFile.Write(bytes) + + return nil + }) + + return err +} + +// FindDatFileSize calculate .dat file size from max offset entry +// there may be extra deletions after that entry +// but they are deletions anyway +func FindDatFileSize(baseFileName string) (datSize int64, err error) { + + version, err := readEcVolumeVersion(baseFileName) + if err != nil { + return 0, fmt.Errorf("read ec volume %s version: %v", baseFileName, err) + } + + err = iterateEcxFile(baseFileName, func(key types.NeedleId, offset types.Offset, size uint32) error { + + if size == types.TombstoneFileSize { + return nil + } + + entryStopOffset := offset.ToAcutalOffset() + needle.GetActualSize(size, version) + if datSize < entryStopOffset { + datSize = entryStopOffset + } + + return nil + }) + + return +} + +func readEcVolumeVersion(baseFileName string) (version needle.Version, err error) { + + // find volume version + datFile, err := os.OpenFile(baseFileName+".ec00", os.O_RDONLY, 0644) + if err != nil { + return 0, fmt.Errorf("open ec volume %s superblock: %v", baseFileName, err) + } + datBackend := backend.NewDiskFile(datFile) + + superBlock, err := super_block.ReadSuperBlock(datBackend) + datBackend.Close() + if err != nil { + return 0, fmt.Errorf("read ec volume %s superblock: %v", baseFileName, err) + } + + return superBlock.Version, nil + +} + +func iterateEcxFile(baseFileName string, processNeedleFn func(key types.NeedleId, offset types.Offset, size uint32) error) error { + ecxFile, openErr := os.OpenFile(baseFileName+".ecx", os.O_RDONLY, 0644) + if openErr != nil { + return fmt.Errorf("cannot open ec index %s.ecx: %v", baseFileName, openErr) + } + defer ecxFile.Close() + + buf := make([]byte, types.NeedleMapEntrySize) + for { + n, err := ecxFile.Read(buf) + if n != types.NeedleMapEntrySize { + if err == io.EOF { + return nil + } + return err + } + key, offset, size := idx.IdxFileEntry(buf) + if processNeedleFn != nil { + err = processNeedleFn(key, offset, size) + } + if err != nil { + if err != io.EOF { + return err + } + return nil + } + } + +} + +func iterateEcjFile(baseFileName string, processNeedleFn func(key types.NeedleId) error) error { + ecjFile, openErr := os.OpenFile(baseFileName+".ecj", os.O_RDONLY, 0644) + if openErr != nil { + return fmt.Errorf("cannot open ec index %s.ecx: %v", baseFileName, openErr) + } + defer ecjFile.Close() + + buf := make([]byte, types.NeedleIdSize) + for { + n, err := ecjFile.Read(buf) + if n != types.NeedleIdSize { + if err == io.EOF { + return nil + } + return err + } + if processNeedleFn != nil { + err = processNeedleFn(types.BytesToNeedleId(buf)) + } + if err != nil { + if err == io.EOF { + return nil + } + return err + } + } + +} + +// WriteDatFile generates .dat from from .ec00 ~ .ec09 files +func WriteDatFile(baseFileName string, datFileSize int64) error { + + datFile, openErr := os.OpenFile(baseFileName+".dat", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if openErr != nil { + return fmt.Errorf("cannot write volume %s.dat: %v", baseFileName, openErr) + } + defer datFile.Close() + + inputFiles := make([]*os.File, DataShardsCount) + + for shardId := 0; shardId < DataShardsCount; shardId++ { + shardFileName := baseFileName + ToExt(shardId) + inputFiles[shardId], openErr = os.OpenFile(shardFileName, os.O_RDONLY, 0) + if openErr != nil { + return openErr + } + defer inputFiles[shardId].Close() + } + + for datFileSize >= DataShardsCount*ErasureCodingLargeBlockSize { + for shardId := 0; shardId < DataShardsCount; shardId++ { + w, err := io.CopyN(datFile, inputFiles[shardId], ErasureCodingLargeBlockSize) + if w != ErasureCodingLargeBlockSize { + return fmt.Errorf("copy %s large block %d: %v", baseFileName, shardId, err) + } + datFileSize -= ErasureCodingLargeBlockSize + } + } + + for datFileSize > 0 { + for shardId := 0; shardId < DataShardsCount; shardId++ { + toRead := min(datFileSize, ErasureCodingSmallBlockSize) + w, err := io.CopyN(datFile, inputFiles[shardId], toRead) + if w != toRead { + return fmt.Errorf("copy %s small block %d: %v", baseFileName, shardId, err) + } + datFileSize -= toRead + } + } + + return nil +} + +func min(x, y int64) int64 { + if x > y { + return y + } + return x +} diff --git a/weed/storage/erasure_coding/ec_encoder.go b/weed/storage/erasure_coding/ec_encoder.go new file mode 100644 index 000000000..97c3ccbd9 --- /dev/null +++ b/weed/storage/erasure_coding/ec_encoder.go @@ -0,0 +1,304 @@ +package erasure_coding + +import ( + "fmt" + "io" + "os" + + "github.com/klauspost/reedsolomon" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/idx" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" + "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + DataShardsCount = 10 + ParityShardsCount = 4 + TotalShardsCount = DataShardsCount + ParityShardsCount + ErasureCodingLargeBlockSize = 1024 * 1024 * 1024 // 1GB + ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB +) + +// WriteSortedFileFromIdx generates .ecx file from existing .idx file +// all keys are sorted in ascending order +func WriteSortedFileFromIdx(baseFileName string, ext string) (e error) { + + nm, err := readNeedleMap(baseFileName) + if nm != nil { + defer nm.Close() + } + if err != nil { + return fmt.Errorf("readNeedleMap: %v", err) + } + + ecxFile, err := os.OpenFile(baseFileName+ext, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("failed to open ecx file: %v", err) + } + defer ecxFile.Close() + + err = nm.AscendingVisit(func(value needle_map.NeedleValue) error { + bytes := value.ToBytes() + _, writeErr := ecxFile.Write(bytes) + return writeErr + }) + + if err != nil { + return fmt.Errorf("failed to visit idx file: %v", err) + } + + return nil +} + +// WriteEcFiles generates .ec00 ~ .ec13 files +func WriteEcFiles(baseFileName string) error { + return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize) +} + +func RebuildEcFiles(baseFileName string) ([]uint32, error) { + return generateMissingEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize) +} + +func ToExt(ecIndex int) string { + return fmt.Sprintf(".ec%02d", ecIndex) +} + +func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error { + file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("failed to open dat file: %v", err) + } + defer file.Close() + + fi, err := file.Stat() + if err != nil { + return fmt.Errorf("failed to stat dat file: %v", err) + } + err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize) + if err != nil { + return fmt.Errorf("encodeDatFile: %v", err) + } + return nil +} + +func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) (generatedShardIds []uint32, err error) { + + shardHasData := make([]bool, TotalShardsCount) + inputFiles := make([]*os.File, TotalShardsCount) + outputFiles := make([]*os.File, TotalShardsCount) + for shardId := 0; shardId < TotalShardsCount; shardId++ { + shardFileName := baseFileName + ToExt(shardId) + if util.FileExists(shardFileName) { + shardHasData[shardId] = true + inputFiles[shardId], err = os.OpenFile(shardFileName, os.O_RDONLY, 0) + if err != nil { + return nil, err + } + defer inputFiles[shardId].Close() + } else { + outputFiles[shardId], err = os.OpenFile(shardFileName, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return nil, err + } + defer outputFiles[shardId].Close() + generatedShardIds = append(generatedShardIds, uint32(shardId)) + } + } + + err = rebuildEcFiles(shardHasData, inputFiles, outputFiles) + if err != nil { + return nil, fmt.Errorf("rebuildEcFiles: %v", err) + } + return +} + +func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error { + + bufferSize := int64(len(buffers[0])) + batchCount := blockSize / bufferSize + if blockSize%bufferSize != 0 { + glog.Fatalf("unexpected block size %d buffer size %d", blockSize, bufferSize) + } + + for b := int64(0); b < batchCount; b++ { + err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs) + if err != nil { + return err + } + } + + return nil +} + +func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error) { + for i := 0; i < TotalShardsCount; i++ { + fname := baseFileName + ToExt(i) + openOption := os.O_TRUNC | os.O_CREATE | os.O_WRONLY + if forRead { + openOption = os.O_RDONLY + } + f, err := os.OpenFile(fname, openOption, 0644) + if err != nil { + return files, fmt.Errorf("failed to open file %s: %v", fname, err) + } + files = append(files, f) + } + return +} + +func closeEcFiles(files []*os.File) { + for _, f := range files { + if f != nil { + f.Close() + } + } +} + +func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error { + + // read data into buffers + for i := 0; i < DataShardsCount; i++ { + n, err := file.ReadAt(buffers[i], startOffset+blockSize*int64(i)) + if err != nil { + if err != io.EOF { + return err + } + } + if n < len(buffers[i]) { + for t := len(buffers[i]) - 1; t >= n; t-- { + buffers[i][t] = 0 + } + } + } + + err := enc.Encode(buffers) + if err != nil { + return err + } + + for i := 0; i < TotalShardsCount; i++ { + _, err := outputs[i].Write(buffers[i]) + if err != nil { + return err + } + } + + return nil +} + +func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error { + + var processedSize int64 + + enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount) + if err != nil { + return fmt.Errorf("failed to create encoder: %v", err) + } + + buffers := make([][]byte, TotalShardsCount) + for i := range buffers { + buffers[i] = make([]byte, bufferSize) + } + + outputs, err := openEcFiles(baseFileName, false) + defer closeEcFiles(outputs) + if err != nil { + return fmt.Errorf("failed to open ec files %s: %v", baseFileName, err) + } + + for remainingSize > largeBlockSize*DataShardsCount { + err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs) + if err != nil { + return fmt.Errorf("failed to encode large chunk data: %v", err) + } + remainingSize -= largeBlockSize * DataShardsCount + processedSize += largeBlockSize * DataShardsCount + } + for remainingSize > 0 { + encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs) + if err != nil { + return fmt.Errorf("failed to encode small chunk data: %v", err) + } + remainingSize -= smallBlockSize * DataShardsCount + processedSize += smallBlockSize * DataShardsCount + } + return nil +} + +func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*os.File) error { + + enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount) + if err != nil { + return fmt.Errorf("failed to create encoder: %v", err) + } + + buffers := make([][]byte, TotalShardsCount) + for i := range buffers { + if shardHasData[i] { + buffers[i] = make([]byte, ErasureCodingSmallBlockSize) + } + } + + var startOffset int64 + var inputBufferDataSize int + for { + + // read the input data from files + for i := 0; i < TotalShardsCount; i++ { + if shardHasData[i] { + n, _ := inputFiles[i].ReadAt(buffers[i], startOffset) + if n == 0 { + return nil + } + if inputBufferDataSize == 0 { + inputBufferDataSize = n + } + if inputBufferDataSize != n { + return fmt.Errorf("ec shard size expected %d actual %d", inputBufferDataSize, n) + } + } else { + buffers[i] = nil + } + } + + // encode the data + err = enc.Reconstruct(buffers) + if err != nil { + return fmt.Errorf("reconstruct: %v", err) + } + + // write the data to output files + for i := 0; i < TotalShardsCount; i++ { + if !shardHasData[i] { + n, _ := outputFiles[i].WriteAt(buffers[i][:inputBufferDataSize], startOffset) + if inputBufferDataSize != n { + return fmt.Errorf("fail to write to %s", outputFiles[i].Name()) + } + } + } + startOffset += int64(inputBufferDataSize) + } + +} + +func readNeedleMap(baseFileName string) (*needle_map.MemDb, error) { + indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644) + if err != nil { + return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err) + } + defer indexFile.Close() + + cm := needle_map.NewMemDb() + err = idx.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error { + if !offset.IsZero() && size != types.TombstoneFileSize { + cm.Set(key, offset, size) + } else { + cm.Delete(key) + } + return nil + }) + return cm, err +} diff --git a/weed/storage/erasure_coding/ec_locate.go b/weed/storage/erasure_coding/ec_locate.go new file mode 100644 index 000000000..562966f8f --- /dev/null +++ b/weed/storage/erasure_coding/ec_locate.go @@ -0,0 +1,83 @@ +package erasure_coding + +type Interval struct { + BlockIndex int + InnerBlockOffset int64 + Size uint32 + IsLargeBlock bool + LargeBlockRowsCount int +} + +func LocateData(largeBlockLength, smallBlockLength int64, datSize int64, offset int64, size uint32) (intervals []Interval) { + blockIndex, isLargeBlock, innerBlockOffset := locateOffset(largeBlockLength, smallBlockLength, datSize, offset) + + // adding DataShardsCount*smallBlockLength to ensure we can derive the number of large block size from a shard size + nLargeBlockRows := int((datSize + DataShardsCount*smallBlockLength) / (largeBlockLength * DataShardsCount)) + + for size > 0 { + interval := Interval{ + BlockIndex: blockIndex, + InnerBlockOffset: innerBlockOffset, + IsLargeBlock: isLargeBlock, + LargeBlockRowsCount: nLargeBlockRows, + } + + blockRemaining := largeBlockLength - innerBlockOffset + if !isLargeBlock { + blockRemaining = smallBlockLength - innerBlockOffset + } + + if int64(size) <= blockRemaining { + interval.Size = size + intervals = append(intervals, interval) + return + } + interval.Size = uint32(blockRemaining) + intervals = append(intervals, interval) + + size -= interval.Size + blockIndex += 1 + if isLargeBlock && blockIndex == nLargeBlockRows*DataShardsCount { + isLargeBlock = false + blockIndex = 0 + } + innerBlockOffset = 0 + + } + return +} + +func locateOffset(largeBlockLength, smallBlockLength int64, datSize int64, offset int64) (blockIndex int, isLargeBlock bool, innerBlockOffset int64) { + largeRowSize := largeBlockLength * DataShardsCount + nLargeBlockRows := datSize / (largeBlockLength * DataShardsCount) + + // if offset is within the large block area + if offset < nLargeBlockRows*largeRowSize { + isLargeBlock = true + blockIndex, innerBlockOffset = locateOffsetWithinBlocks(largeBlockLength, offset) + return + } + + isLargeBlock = false + offset -= nLargeBlockRows * largeRowSize + blockIndex, innerBlockOffset = locateOffsetWithinBlocks(smallBlockLength, offset) + return +} + +func locateOffsetWithinBlocks(blockLength int64, offset int64) (blockIndex int, innerBlockOffset int64) { + blockIndex = int(offset / blockLength) + innerBlockOffset = offset % blockLength + return +} + +func (interval Interval) ToShardIdAndOffset(largeBlockSize, smallBlockSize int64) (ShardId, int64) { + ecFileOffset := interval.InnerBlockOffset + rowIndex := interval.BlockIndex / DataShardsCount + if interval.IsLargeBlock { + ecFileOffset += int64(rowIndex) * largeBlockSize + } else { + ecFileOffset += int64(interval.LargeBlockRowsCount)*largeBlockSize + int64(rowIndex)*smallBlockSize + } + ecFileIndex := interval.BlockIndex % DataShardsCount + return ShardId(ecFileIndex), ecFileOffset +} diff --git a/weed/storage/erasure_coding/ec_shard.go b/weed/storage/erasure_coding/ec_shard.go new file mode 100644 index 000000000..47e6d3d1e --- /dev/null +++ b/weed/storage/erasure_coding/ec_shard.go @@ -0,0 +1,91 @@ +package erasure_coding + +import ( + "fmt" + "os" + "path" + "strconv" + + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +type ShardId uint8 + +type EcVolumeShard struct { + VolumeId needle.VolumeId + ShardId ShardId + Collection string + dir string + ecdFile *os.File + ecdFileSize int64 +} + +func NewEcVolumeShard(dirname string, collection string, id needle.VolumeId, shardId ShardId) (v *EcVolumeShard, e error) { + + v = &EcVolumeShard{dir: dirname, Collection: collection, VolumeId: id, ShardId: shardId} + + baseFileName := v.FileName() + + // open ecd file + if v.ecdFile, e = os.OpenFile(baseFileName+ToExt(int(shardId)), os.O_RDONLY, 0644); e != nil { + return nil, fmt.Errorf("cannot read ec volume shard %s.%s: %v", baseFileName, ToExt(int(shardId)), e) + } + ecdFi, statErr := v.ecdFile.Stat() + if statErr != nil { + return nil, fmt.Errorf("can not stat ec volume shard %s.%s: %v", baseFileName, ToExt(int(shardId)), statErr) + } + v.ecdFileSize = ecdFi.Size() + + stats.VolumeServerVolumeCounter.WithLabelValues(v.Collection, "ec_shards").Inc() + + return +} + +func (shard *EcVolumeShard) Size() int64 { + return shard.ecdFileSize +} + +func (shard *EcVolumeShard) String() string { + return fmt.Sprintf("ec shard %v:%v, dir:%s, Collection:%s", shard.VolumeId, shard.ShardId, shard.dir, shard.Collection) +} + +func (shard *EcVolumeShard) FileName() (fileName string) { + return EcShardFileName(shard.Collection, shard.dir, int(shard.VolumeId)) +} + +func EcShardFileName(collection string, dir string, id int) (fileName string) { + idString := strconv.Itoa(id) + if collection == "" { + fileName = path.Join(dir, idString) + } else { + fileName = path.Join(dir, collection+"_"+idString) + } + return +} + +func EcShardBaseFileName(collection string, id int) (baseFileName string) { + baseFileName = strconv.Itoa(id) + if collection != "" { + baseFileName = collection + "_" + baseFileName + } + return +} + +func (shard *EcVolumeShard) Close() { + if shard.ecdFile != nil { + _ = shard.ecdFile.Close() + shard.ecdFile = nil + } +} + +func (shard *EcVolumeShard) Destroy() { + os.Remove(shard.FileName() + ToExt(int(shard.ShardId))) + stats.VolumeServerVolumeCounter.WithLabelValues(shard.Collection, "ec_shards").Dec() +} + +func (shard *EcVolumeShard) ReadAt(buf []byte, offset int64) (int, error) { + + return shard.ecdFile.ReadAt(buf, offset) + +} diff --git a/weed/storage/erasure_coding/ec_test.go b/weed/storage/erasure_coding/ec_test.go new file mode 100644 index 000000000..92b83cdc8 --- /dev/null +++ b/weed/storage/erasure_coding/ec_test.go @@ -0,0 +1,207 @@ +package erasure_coding + +import ( + "bytes" + "fmt" + "math/rand" + "os" + "testing" + + "github.com/klauspost/reedsolomon" + + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" + "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +const ( + largeBlockSize = 10000 + smallBlockSize = 100 +) + +func TestEncodingDecoding(t *testing.T) { + bufferSize := 50 + baseFileName := "1" + + err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize) + if err != nil { + t.Logf("generateEcFiles: %v", err) + } + + err = WriteSortedFileFromIdx(baseFileName, ".ecx") + if err != nil { + t.Logf("WriteSortedFileFromIdx: %v", err) + } + + err = validateFiles(baseFileName) + if err != nil { + t.Logf("WriteSortedFileFromIdx: %v", err) + } + + removeGeneratedFiles(baseFileName) + +} + +func validateFiles(baseFileName string) error { + nm, err := readNeedleMap(baseFileName) + defer nm.Close() + if err != nil { + return fmt.Errorf("readNeedleMap: %v", err) + } + + datFile, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("failed to open dat file: %v", err) + } + defer datFile.Close() + + fi, err := datFile.Stat() + if err != nil { + return fmt.Errorf("failed to stat dat file: %v", err) + } + + ecFiles, err := openEcFiles(baseFileName, true) + defer closeEcFiles(ecFiles) + + err = nm.AscendingVisit(func(value needle_map.NeedleValue) error { + return assertSame(datFile, fi.Size(), ecFiles, value.Offset, value.Size) + }) + if err != nil { + return fmt.Errorf("failed to check ec files: %v", err) + } + return nil +} + +func assertSame(datFile *os.File, datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) error { + + data, err := readDatFile(datFile, offset, size) + if err != nil { + return fmt.Errorf("failed to read dat file: %v", err) + } + + ecData, err := readEcFile(datSize, ecFiles, offset, size) + if err != nil { + return fmt.Errorf("failed to read ec file: %v", err) + } + + if bytes.Compare(data, ecData) != 0 { + return fmt.Errorf("unexpected data read") + } + + return nil +} + +func readDatFile(datFile *os.File, offset types.Offset, size uint32) ([]byte, error) { + + data := make([]byte, size) + n, err := datFile.ReadAt(data, offset.ToAcutalOffset()) + if err != nil { + return nil, fmt.Errorf("failed to ReadAt dat file: %v", err) + } + if n != int(size) { + return nil, fmt.Errorf("unexpected read size %d, expected %d", n, size) + } + return data, nil +} + +func readEcFile(datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) (data []byte, err error) { + + intervals := LocateData(largeBlockSize, smallBlockSize, datSize, offset.ToAcutalOffset(), size) + + for i, interval := range intervals { + if d, e := readOneInterval(interval, ecFiles); e != nil { + return nil, e + } else { + if i == 0 { + data = d + } else { + data = append(data, d...) + } + } + } + + return data, nil +} + +func readOneInterval(interval Interval, ecFiles []*os.File) (data []byte, err error) { + + ecFileIndex, ecFileOffset := interval.ToShardIdAndOffset(largeBlockSize, smallBlockSize) + + data = make([]byte, interval.Size) + err = readFromFile(ecFiles[ecFileIndex], data, ecFileOffset) + { // do some ec testing + ecData, err := readFromOtherEcFiles(ecFiles, int(ecFileIndex), ecFileOffset, interval.Size) + if err != nil { + return nil, fmt.Errorf("ec reconstruct error: %v", err) + } + if bytes.Compare(data, ecData) != 0 { + return nil, fmt.Errorf("ec compare error") + } + } + return +} + +func readFromOtherEcFiles(ecFiles []*os.File, ecFileIndex int, ecFileOffset int64, size uint32) (data []byte, err error) { + enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount) + if err != nil { + return nil, fmt.Errorf("failed to create encoder: %v", err) + } + + bufs := make([][]byte, TotalShardsCount) + for i := 0; i < DataShardsCount; { + n := int(rand.Int31n(TotalShardsCount)) + if n == ecFileIndex || bufs[n] != nil { + continue + } + bufs[n] = make([]byte, size) + i++ + } + + for i, buf := range bufs { + if buf == nil { + continue + } + err = readFromFile(ecFiles[i], buf, ecFileOffset) + if err != nil { + return + } + } + + if err = enc.ReconstructData(bufs); err != nil { + return nil, err + } + + return bufs[ecFileIndex], nil +} + +func readFromFile(file *os.File, data []byte, ecFileOffset int64) (err error) { + _, err = file.ReadAt(data, ecFileOffset) + return +} + +func removeGeneratedFiles(baseFileName string) { + for i := 0; i < DataShardsCount+ParityShardsCount; i++ { + fname := fmt.Sprintf("%s.ec%02d", baseFileName, i) + os.Remove(fname) + } + os.Remove(baseFileName + ".ecx") +} + +func TestLocateData(t *testing.T) { + intervals := LocateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize, 1) + if len(intervals) != 1 { + t.Errorf("unexpected interval size %d", len(intervals)) + } + if !intervals[0].sameAs(Interval{0, 0, 1, false, 1}) { + t.Errorf("unexpected interval %+v", intervals[0]) + } + + intervals = LocateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize/2+100, DataShardsCount*largeBlockSize+1-DataShardsCount*largeBlockSize/2-100) + fmt.Printf("%+v\n", intervals) +} + +func (this Interval) sameAs(that Interval) bool { + return this.IsLargeBlock == that.IsLargeBlock && + this.InnerBlockOffset == that.InnerBlockOffset && + this.BlockIndex == that.BlockIndex && + this.Size == that.Size +} diff --git a/weed/storage/erasure_coding/ec_volume.go b/weed/storage/erasure_coding/ec_volume.go new file mode 100644 index 000000000..3d9aa2cff --- /dev/null +++ b/weed/storage/erasure_coding/ec_volume.go @@ -0,0 +1,228 @@ +package erasure_coding + +import ( + "errors" + "fmt" + "math" + "os" + "sort" + "sync" + "time" + + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/idx" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +var ( + NotFoundError = errors.New("needle not found") +) + +type EcVolume struct { + VolumeId needle.VolumeId + Collection string + dir string + ecxFile *os.File + ecxFileSize int64 + ecxCreatedAt time.Time + Shards []*EcVolumeShard + ShardLocations map[ShardId][]string + ShardLocationsRefreshTime time.Time + ShardLocationsLock sync.RWMutex + Version needle.Version + ecjFile *os.File + ecjFileAccessLock sync.Mutex +} + +func NewEcVolume(dir string, collection string, vid needle.VolumeId) (ev *EcVolume, err error) { + ev = &EcVolume{dir: dir, Collection: collection, VolumeId: vid} + + baseFileName := EcShardFileName(collection, dir, int(vid)) + + // open ecx file + if ev.ecxFile, err = os.OpenFile(baseFileName+".ecx", os.O_RDWR, 0644); err != nil { + return nil, fmt.Errorf("cannot open ec volume index %s.ecx: %v", baseFileName, err) + } + ecxFi, statErr := ev.ecxFile.Stat() + if statErr != nil { + return nil, fmt.Errorf("can not stat ec volume index %s.ecx: %v", baseFileName, statErr) + } + ev.ecxFileSize = ecxFi.Size() + ev.ecxCreatedAt = ecxFi.ModTime() + + // open ecj file + if ev.ecjFile, err = os.OpenFile(baseFileName+".ecj", os.O_RDWR|os.O_CREATE, 0644); err != nil { + return nil, fmt.Errorf("cannot open ec volume journal %s.ecj: %v", baseFileName, err) + } + + // read volume info + ev.Version = needle.Version3 + if volumeInfo, found, _ := pb.MaybeLoadVolumeInfo(baseFileName + ".vif"); found { + ev.Version = needle.Version(volumeInfo.Version) + } else { + pb.SaveVolumeInfo(baseFileName+".vif", &volume_server_pb.VolumeInfo{Version: uint32(ev.Version)}) + } + + ev.ShardLocations = make(map[ShardId][]string) + + return +} + +func (ev *EcVolume) AddEcVolumeShard(ecVolumeShard *EcVolumeShard) bool { + for _, s := range ev.Shards { + if s.ShardId == ecVolumeShard.ShardId { + return false + } + } + ev.Shards = append(ev.Shards, ecVolumeShard) + sort.Slice(ev.Shards, func(i, j int) bool { + return ev.Shards[i].VolumeId < ev.Shards[j].VolumeId || + ev.Shards[i].VolumeId == ev.Shards[j].VolumeId && ev.Shards[i].ShardId < ev.Shards[j].ShardId + }) + return true +} + +func (ev *EcVolume) DeleteEcVolumeShard(shardId ShardId) (ecVolumeShard *EcVolumeShard, deleted bool) { + foundPosition := -1 + for i, s := range ev.Shards { + if s.ShardId == shardId { + foundPosition = i + } + } + if foundPosition < 0 { + return nil, false + } + + ecVolumeShard = ev.Shards[foundPosition] + + ev.Shards = append(ev.Shards[:foundPosition], ev.Shards[foundPosition+1:]...) + return ecVolumeShard, true +} + +func (ev *EcVolume) FindEcVolumeShard(shardId ShardId) (ecVolumeShard *EcVolumeShard, found bool) { + for _, s := range ev.Shards { + if s.ShardId == shardId { + return s, true + } + } + return nil, false +} + +func (ev *EcVolume) Close() { + for _, s := range ev.Shards { + s.Close() + } + if ev.ecjFile != nil { + ev.ecjFileAccessLock.Lock() + _ = ev.ecjFile.Close() + ev.ecjFile = nil + ev.ecjFileAccessLock.Unlock() + } + if ev.ecxFile != nil { + _ = ev.ecxFile.Close() + ev.ecxFile = nil + } +} + +func (ev *EcVolume) Destroy() { + + ev.Close() + + for _, s := range ev.Shards { + s.Destroy() + } + os.Remove(ev.FileName() + ".ecx") + os.Remove(ev.FileName() + ".ecj") + os.Remove(ev.FileName() + ".vif") +} + +func (ev *EcVolume) FileName() string { + + return EcShardFileName(ev.Collection, ev.dir, int(ev.VolumeId)) + +} + +func (ev *EcVolume) ShardSize() int64 { + if len(ev.Shards) > 0 { + return ev.Shards[0].Size() + } + return 0 +} + +func (ev *EcVolume) CreatedAt() time.Time { + return ev.ecxCreatedAt +} + +func (ev *EcVolume) ShardIdList() (shardIds []ShardId) { + for _, s := range ev.Shards { + shardIds = append(shardIds, s.ShardId) + } + return +} + +func (ev *EcVolume) ToVolumeEcShardInformationMessage() (messages []*master_pb.VolumeEcShardInformationMessage) { + prevVolumeId := needle.VolumeId(math.MaxUint32) + var m *master_pb.VolumeEcShardInformationMessage + for _, s := range ev.Shards { + if s.VolumeId != prevVolumeId { + m = &master_pb.VolumeEcShardInformationMessage{ + Id: uint32(s.VolumeId), + Collection: s.Collection, + } + messages = append(messages, m) + } + prevVolumeId = s.VolumeId + m.EcIndexBits = uint32(ShardBits(m.EcIndexBits).AddShardId(s.ShardId)) + } + return +} + +func (ev *EcVolume) LocateEcShardNeedle(needleId types.NeedleId, version needle.Version) (offset types.Offset, size uint32, intervals []Interval, err error) { + + // find the needle from ecx file + offset, size, err = ev.FindNeedleFromEcx(needleId) + if err != nil { + return types.Offset{}, 0, nil, fmt.Errorf("FindNeedleFromEcx: %v", err) + } + + shard := ev.Shards[0] + + // calculate the locations in the ec shards + intervals = LocateData(ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, DataShardsCount*shard.ecdFileSize, offset.ToAcutalOffset(), uint32(needle.GetActualSize(size, version))) + + return +} + +func (ev *EcVolume) FindNeedleFromEcx(needleId types.NeedleId) (offset types.Offset, size uint32, err error) { + return SearchNeedleFromSortedIndex(ev.ecxFile, ev.ecxFileSize, needleId, nil) +} + +func SearchNeedleFromSortedIndex(ecxFile *os.File, ecxFileSize int64, needleId types.NeedleId, processNeedleFn func(file *os.File, offset int64) error) (offset types.Offset, size uint32, err error) { + var key types.NeedleId + buf := make([]byte, types.NeedleMapEntrySize) + l, h := int64(0), ecxFileSize/types.NeedleMapEntrySize + for l < h { + m := (l + h) / 2 + if _, err := ecxFile.ReadAt(buf, m*types.NeedleMapEntrySize); err != nil { + return types.Offset{}, types.TombstoneFileSize, fmt.Errorf("ecx file %d read at %d: %v", ecxFileSize, m*types.NeedleMapEntrySize, err) + } + key, offset, size = idx.IdxFileEntry(buf) + if key == needleId { + if processNeedleFn != nil { + err = processNeedleFn(ecxFile, m*types.NeedleHeaderSize) + } + return + } + if key < needleId { + l = m + 1 + } else { + h = m + } + } + + err = NotFoundError + return +} diff --git a/weed/storage/erasure_coding/ec_volume_delete.go b/weed/storage/erasure_coding/ec_volume_delete.go new file mode 100644 index 000000000..822a9e923 --- /dev/null +++ b/weed/storage/erasure_coding/ec_volume_delete.go @@ -0,0 +1,98 @@ +package erasure_coding + +import ( + "fmt" + "io" + "os" + + "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" +) + +var ( + MarkNeedleDeleted = func(file *os.File, offset int64) error { + b := make([]byte, types.SizeSize) + util.Uint32toBytes(b, types.TombstoneFileSize) + n, err := file.WriteAt(b, offset+types.NeedleIdSize+types.OffsetSize) + if err != nil { + return fmt.Errorf("sorted needle write error: %v", err) + } + if n != types.SizeSize { + return fmt.Errorf("sorted needle written %d bytes, expecting %d", n, types.SizeSize) + } + return nil + } +) + +func (ev *EcVolume) DeleteNeedleFromEcx(needleId types.NeedleId) (err error) { + + _, _, err = SearchNeedleFromSortedIndex(ev.ecxFile, ev.ecxFileSize, needleId, MarkNeedleDeleted) + + if err != nil { + if err == NotFoundError { + return nil + } + return err + } + + b := make([]byte, types.NeedleIdSize) + types.NeedleIdToBytes(b, needleId) + + ev.ecjFileAccessLock.Lock() + + ev.ecjFile.Seek(0, io.SeekEnd) + ev.ecjFile.Write(b) + + ev.ecjFileAccessLock.Unlock() + + return +} + +func RebuildEcxFile(baseFileName string) error { + + if !util.FileExists(baseFileName + ".ecj") { + return nil + } + + ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_RDWR, 0644) + if err != nil { + return fmt.Errorf("rebuild: failed to open ecx file: %v", err) + } + defer ecxFile.Close() + + fstat, err := ecxFile.Stat() + if err != nil { + return err + } + + ecxFileSize := fstat.Size() + + ecjFile, err := os.OpenFile(baseFileName+".ecj", os.O_RDWR, 0644) + if err != nil { + return fmt.Errorf("rebuild: failed to open ecj file: %v", err) + } + + buf := make([]byte, types.NeedleIdSize) + for { + n, _ := ecjFile.Read(buf) + if n != types.NeedleIdSize { + break + } + + needleId := types.BytesToNeedleId(buf) + + _, _, err = SearchNeedleFromSortedIndex(ecxFile, ecxFileSize, needleId, MarkNeedleDeleted) + + if err != nil && err != NotFoundError { + ecxFile.Close() + return err + } + + } + + ecxFile.Close() + + os.Remove(baseFileName + ".ecj") + + return nil +} diff --git a/weed/storage/erasure_coding/ec_volume_info.go b/weed/storage/erasure_coding/ec_volume_info.go new file mode 100644 index 000000000..8ff65bb0f --- /dev/null +++ b/weed/storage/erasure_coding/ec_volume_info.go @@ -0,0 +1,113 @@ +package erasure_coding + +import ( + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +// data structure used in master +type EcVolumeInfo struct { + VolumeId needle.VolumeId + Collection string + ShardBits ShardBits +} + +func NewEcVolumeInfo(collection string, vid needle.VolumeId, shardBits ShardBits) *EcVolumeInfo { + return &EcVolumeInfo{ + Collection: collection, + VolumeId: vid, + ShardBits: shardBits, + } +} + +func (ecInfo *EcVolumeInfo) AddShardId(id ShardId) { + ecInfo.ShardBits = ecInfo.ShardBits.AddShardId(id) +} + +func (ecInfo *EcVolumeInfo) RemoveShardId(id ShardId) { + ecInfo.ShardBits = ecInfo.ShardBits.RemoveShardId(id) +} + +func (ecInfo *EcVolumeInfo) HasShardId(id ShardId) bool { + return ecInfo.ShardBits.HasShardId(id) +} + +func (ecInfo *EcVolumeInfo) ShardIds() (ret []ShardId) { + return ecInfo.ShardBits.ShardIds() +} + +func (ecInfo *EcVolumeInfo) ShardIdCount() (count int) { + return ecInfo.ShardBits.ShardIdCount() +} + +func (ecInfo *EcVolumeInfo) Minus(other *EcVolumeInfo) *EcVolumeInfo { + ret := &EcVolumeInfo{ + VolumeId: ecInfo.VolumeId, + Collection: ecInfo.Collection, + ShardBits: ecInfo.ShardBits.Minus(other.ShardBits), + } + + return ret +} + +func (ecInfo *EcVolumeInfo) ToVolumeEcShardInformationMessage() (ret *master_pb.VolumeEcShardInformationMessage) { + return &master_pb.VolumeEcShardInformationMessage{ + Id: uint32(ecInfo.VolumeId), + EcIndexBits: uint32(ecInfo.ShardBits), + Collection: ecInfo.Collection, + } +} + +type ShardBits uint32 // use bits to indicate the shard id, use 32 bits just for possible future extension + +func (b ShardBits) AddShardId(id ShardId) ShardBits { + return b | (1 << id) +} + +func (b ShardBits) RemoveShardId(id ShardId) ShardBits { + return b &^ (1 << id) +} + +func (b ShardBits) HasShardId(id ShardId) bool { + return b&(1<<id) > 0 +} + +func (b ShardBits) ShardIds() (ret []ShardId) { + for i := ShardId(0); i < TotalShardsCount; i++ { + if b.HasShardId(i) { + ret = append(ret, i) + } + } + return +} + +func (b ShardBits) ToUint32Slice() (ret []uint32) { + for i := uint32(0); i < TotalShardsCount; i++ { + if b.HasShardId(ShardId(i)) { + ret = append(ret, i) + } + } + return +} + +func (b ShardBits) ShardIdCount() (count int) { + for count = 0; b > 0; count++ { + b &= b - 1 + } + return +} + +func (b ShardBits) Minus(other ShardBits) ShardBits { + return b &^ other +} + +func (b ShardBits) Plus(other ShardBits) ShardBits { + return b | other +} + +func (b ShardBits) MinusParityShards() ShardBits { + for i := DataShardsCount; i < TotalShardsCount; i++ { + b = b.RemoveShardId(ShardId(i)) + } + return b +} diff --git a/weed/storage/file_id.go b/weed/storage/file_id.go deleted file mode 100644 index 37dcb7c70..000000000 --- a/weed/storage/file_id.go +++ /dev/null @@ -1,34 +0,0 @@ -package storage - -import ( - "encoding/hex" - . "github.com/chrislusf/seaweedfs/weed/storage/types" -) - -type FileId struct { - VolumeId VolumeId - Key NeedleId - Cookie Cookie -} - -func NewFileIdFromNeedle(VolumeId VolumeId, n *Needle) *FileId { - return &FileId{VolumeId: VolumeId, Key: n.Id, Cookie: n.Cookie} -} - -func NewFileId(VolumeId VolumeId, key uint64, cookie uint32) *FileId { - return &FileId{VolumeId: VolumeId, Key: Uint64ToNeedleId(key), Cookie: Uint32ToCookie(cookie)} -} - -func (n *FileId) String() string { - return n.VolumeId.String() + "," + formatNeedleIdCookie(n.Key, n.Cookie) -} - -func formatNeedleIdCookie(key NeedleId, cookie Cookie) string { - bytes := make([]byte, NeedleIdSize+CookieSize) - NeedleIdToBytes(bytes[0:NeedleIdSize], key) - CookieToBytes(bytes[NeedleIdSize:NeedleIdSize+CookieSize], cookie) - nonzero_index := 0 - for ; bytes[nonzero_index] == 0; nonzero_index++ { - } - return hex.EncodeToString(bytes[nonzero_index:]) -} diff --git a/weed/storage/idx/walk.go b/weed/storage/idx/walk.go new file mode 100644 index 000000000..90efb75e6 --- /dev/null +++ b/weed/storage/idx/walk.go @@ -0,0 +1,53 @@ +package idx + +import ( + "io" + "os" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" +) + +// walks through the index file, calls fn function with each key, offset, size +// stops with the error returned by the fn function +func WalkIndexFile(r *os.File, fn func(key types.NeedleId, offset types.Offset, size uint32) error) error { + var readerOffset int64 + bytes := make([]byte, types.NeedleMapEntrySize*RowsToRead) + count, e := r.ReadAt(bytes, readerOffset) + glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e) + readerOffset += int64(count) + var ( + key types.NeedleId + offset types.Offset + size uint32 + i int + ) + + for count > 0 && e == nil || e == io.EOF { + for i = 0; i+types.NeedleMapEntrySize <= count; i += types.NeedleMapEntrySize { + key, offset, size = IdxFileEntry(bytes[i : i+types.NeedleMapEntrySize]) + if e = fn(key, offset, size); e != nil { + return e + } + } + if e == io.EOF { + return nil + } + count, e = r.ReadAt(bytes, readerOffset) + glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e) + readerOffset += int64(count) + } + return e +} + +func IdxFileEntry(bytes []byte) (key types.NeedleId, offset types.Offset, size uint32) { + key = types.BytesToNeedleId(bytes[:types.NeedleIdSize]) + offset = types.BytesToOffset(bytes[types.NeedleIdSize : types.NeedleIdSize+types.OffsetSize]) + size = util.BytesToUint32(bytes[types.NeedleIdSize+types.OffsetSize : types.NeedleIdSize+types.OffsetSize+types.SizeSize]) + return +} + +const ( + RowsToRead = 1024 +) diff --git a/weed/storage/needle/btree_map.go b/weed/storage/needle/btree_map.go deleted file mode 100644 index d688b802e..000000000 --- a/weed/storage/needle/btree_map.go +++ /dev/null @@ -1,53 +0,0 @@ -package needle - -import ( - . "github.com/chrislusf/seaweedfs/weed/storage/types" - "github.com/google/btree" -) - -//This map assumes mostly inserting increasing keys -type BtreeMap struct { - tree *btree.BTree -} - -func NewBtreeMap() *BtreeMap { - return &BtreeMap{ - tree: btree.New(32), - } -} - -func (cm *BtreeMap) Set(key NeedleId, offset Offset, size uint32) (oldOffset Offset, oldSize uint32) { - found := cm.tree.ReplaceOrInsert(NeedleValue{key, offset, size}) - if found != nil { - old := found.(NeedleValue) - return old.Offset, old.Size - } - return -} - -func (cm *BtreeMap) Delete(key NeedleId) (oldSize uint32) { - found := cm.tree.Delete(NeedleValue{key, 0, 0}) - if found != nil { - old := found.(NeedleValue) - return old.Size - } - return -} -func (cm *BtreeMap) Get(key NeedleId) (*NeedleValue, bool) { - found := cm.tree.Get(NeedleValue{key, 0, 0}) - if found != nil { - old := found.(NeedleValue) - return &old, true - } - return nil, false -} - -// Visit visits all entries or stop if any error when visiting -func (cm *BtreeMap) Visit(visit func(NeedleValue) error) (ret error) { - cm.tree.Ascend(func(item btree.Item) bool { - needle := item.(NeedleValue) - ret = visit(needle) - return ret == nil - }) - return ret -} diff --git a/weed/storage/needle/compact_map.go b/weed/storage/needle/compact_map.go deleted file mode 100644 index 9852dca74..000000000 --- a/weed/storage/needle/compact_map.go +++ /dev/null @@ -1,195 +0,0 @@ -package needle - -import ( - . "github.com/chrislusf/seaweedfs/weed/storage/types" - "sync" -) - -type CompactSection struct { - sync.RWMutex - values []NeedleValue - overflow map[NeedleId]NeedleValue - start NeedleId - end NeedleId - counter int -} - -func NewCompactSection(start NeedleId) *CompactSection { - return &CompactSection{ - values: make([]NeedleValue, batch), - overflow: make(map[NeedleId]NeedleValue), - start: start, - } -} - -//return old entry size -func (cs *CompactSection) Set(key NeedleId, offset Offset, size uint32) (oldOffset Offset, oldSize uint32) { - cs.Lock() - if key > cs.end { - cs.end = key - } - if i := cs.binarySearchValues(key); i >= 0 { - oldOffset, oldSize = cs.values[i].Offset, cs.values[i].Size - //println("key", key, "old size", ret) - cs.values[i].Offset, cs.values[i].Size = offset, size - } else { - needOverflow := cs.counter >= batch - needOverflow = needOverflow || cs.counter > 0 && cs.values[cs.counter-1].Key > key - if needOverflow { - //println("start", cs.start, "counter", cs.counter, "key", key) - if oldValue, found := cs.overflow[key]; found { - oldOffset, oldSize = oldValue.Offset, oldValue.Size - } - cs.overflow[key] = NeedleValue{Key: key, Offset: offset, Size: size} - } else { - p := &cs.values[cs.counter] - p.Key, p.Offset, p.Size = key, offset, size - //println("added index", cs.counter, "key", key, cs.values[cs.counter].Key) - cs.counter++ - } - } - cs.Unlock() - return -} - -//return old entry size -func (cs *CompactSection) Delete(key NeedleId) uint32 { - cs.Lock() - ret := uint32(0) - if i := cs.binarySearchValues(key); i >= 0 { - if cs.values[i].Size > 0 { - ret = cs.values[i].Size - cs.values[i].Size = 0 - } - } - if v, found := cs.overflow[key]; found { - delete(cs.overflow, key) - ret = v.Size - } - cs.Unlock() - return ret -} -func (cs *CompactSection) Get(key NeedleId) (*NeedleValue, bool) { - cs.RLock() - if v, ok := cs.overflow[key]; ok { - cs.RUnlock() - return &v, true - } - if i := cs.binarySearchValues(key); i >= 0 { - cs.RUnlock() - return &cs.values[i], true - } - cs.RUnlock() - return nil, false -} -func (cs *CompactSection) binarySearchValues(key NeedleId) int { - l, h := 0, cs.counter-1 - if h >= 0 && cs.values[h].Key < key { - return -2 - } - //println("looking for key", key) - for l <= h { - m := (l + h) / 2 - //println("mid", m, "key", cs.values[m].Key, cs.values[m].Offset, cs.values[m].Size) - if cs.values[m].Key < key { - l = m + 1 - } else if key < cs.values[m].Key { - h = m - 1 - } else { - //println("found", m) - return m - } - } - return -1 -} - -//This map assumes mostly inserting increasing keys -//This map assumes mostly inserting increasing keys -type CompactMap struct { - list []*CompactSection -} - -func NewCompactMap() *CompactMap { - return &CompactMap{} -} - -func (cm *CompactMap) Set(key NeedleId, offset Offset, size uint32) (oldOffset Offset, oldSize uint32) { - x := cm.binarySearchCompactSection(key) - if x < 0 { - //println(x, "creating", len(cm.list), "section, starting", key) - cm.list = append(cm.list, NewCompactSection(key)) - x = len(cm.list) - 1 - //keep compact section sorted by start - for x > 0 { - if cm.list[x-1].start > cm.list[x].start { - cm.list[x-1], cm.list[x] = cm.list[x], cm.list[x-1] - x = x - 1 - } else { - break - } - } - } - return cm.list[x].Set(key, offset, size) -} -func (cm *CompactMap) Delete(key NeedleId) uint32 { - x := cm.binarySearchCompactSection(key) - if x < 0 { - return uint32(0) - } - return cm.list[x].Delete(key) -} -func (cm *CompactMap) Get(key NeedleId) (*NeedleValue, bool) { - x := cm.binarySearchCompactSection(key) - if x < 0 { - return nil, false - } - return cm.list[x].Get(key) -} -func (cm *CompactMap) binarySearchCompactSection(key NeedleId) int { - l, h := 0, len(cm.list)-1 - if h < 0 { - return -5 - } - if cm.list[h].start <= key { - if cm.list[h].counter < batch || key <= cm.list[h].end { - return h - } - return -4 - } - for l <= h { - m := (l + h) / 2 - if key < cm.list[m].start { - h = m - 1 - } else { // cm.list[m].start <= key - if cm.list[m+1].start <= key { - l = m + 1 - } else { - return m - } - } - } - return -3 -} - -// Visit visits all entries or stop if any error when visiting -func (cm *CompactMap) Visit(visit func(NeedleValue) error) error { - for _, cs := range cm.list { - cs.RLock() - for _, v := range cs.overflow { - if err := visit(v); err != nil { - cs.RUnlock() - return err - } - } - for _, v := range cs.values { - if _, found := cs.overflow[v.Key]; !found { - if err := visit(v); err != nil { - cs.RUnlock() - return err - } - } - } - cs.RUnlock() - } - return nil -} diff --git a/weed/storage/needle/compact_map_perf_test.go b/weed/storage/needle/compact_map_perf_test.go deleted file mode 100644 index a66836ac8..000000000 --- a/weed/storage/needle/compact_map_perf_test.go +++ /dev/null @@ -1,46 +0,0 @@ -package needle - -import ( - "log" - "os" - "testing" - - "github.com/chrislusf/seaweedfs/weed/glog" - . "github.com/chrislusf/seaweedfs/weed/storage/types" - "github.com/chrislusf/seaweedfs/weed/util" -) - -func TestMemoryUsage(t *testing.T) { - - indexFile, ie := os.OpenFile("../../../test/sample.idx", os.O_RDWR|os.O_RDONLY, 0644) - if ie != nil { - log.Fatalln(ie) - } - loadNewNeedleMap(indexFile) - -} - -func loadNewNeedleMap(file *os.File) { - m := NewCompactMap() - bytes := make([]byte, 16*1024) - count, e := file.Read(bytes) - if count > 0 { - fstat, _ := file.Stat() - glog.V(0).Infoln("Loading index file", fstat.Name(), "size", fstat.Size()) - } - for count > 0 && e == nil { - for i := 0; i < count; i += 16 { - key := BytesToNeedleId(bytes[i : i+NeedleIdSize]) - offset := BytesToOffset(bytes[i+NeedleIdSize : i+NeedleIdSize+OffsetSize]) - size := util.BytesToUint32(bytes[i+NeedleIdSize+OffsetSize : i+NeedleIdSize+OffsetSize+SizeSize]) - - if offset > 0 { - m.Set(NeedleId(key), offset, size) - } else { - //delete(m, key) - } - } - - count, e = file.Read(bytes) - } -} diff --git a/weed/storage/needle/compact_map_test.go b/weed/storage/needle/compact_map_test.go deleted file mode 100644 index b4cbb446a..000000000 --- a/weed/storage/needle/compact_map_test.go +++ /dev/null @@ -1,78 +0,0 @@ -package needle - -import ( - . "github.com/chrislusf/seaweedfs/weed/storage/types" - "testing" -) - -func TestIssue52(t *testing.T) { - m := NewCompactMap() - m.Set(NeedleId(10002), 10002, 10002) - if element, ok := m.Get(NeedleId(10002)); ok { - println("key", 10002, "ok", ok, element.Key, element.Offset, element.Size) - } - m.Set(NeedleId(10001), 10001, 10001) - if element, ok := m.Get(NeedleId(10002)); ok { - println("key", 10002, "ok", ok, element.Key, element.Offset, element.Size) - } else { - t.Fatal("key 10002 missing after setting 10001") - } -} - -func TestXYZ(t *testing.T) { - m := NewCompactMap() - for i := uint32(0); i < 100*batch; i += 2 { - m.Set(NeedleId(i), Offset(i), i) - } - - for i := uint32(0); i < 100*batch; i += 37 { - m.Delete(NeedleId(i)) - } - - for i := uint32(0); i < 10*batch; i += 3 { - m.Set(NeedleId(i), Offset(i+11), i+5) - } - - // for i := uint32(0); i < 100; i++ { - // if v := m.Get(Key(i)); v != nil { - // glog.V(4).Infoln(i, "=", v.Key, v.Offset, v.Size) - // } - // } - - for i := uint32(0); i < 10*batch; i++ { - v, ok := m.Get(NeedleId(i)) - if i%3 == 0 { - if !ok { - t.Fatal("key", i, "missing!") - } - if v.Size != i+5 { - t.Fatal("key", i, "size", v.Size) - } - } else if i%37 == 0 { - if ok && v.Size > 0 { - t.Fatal("key", i, "should have been deleted needle value", v) - } - } else if i%2 == 0 { - if v.Size != i { - t.Fatal("key", i, "size", v.Size) - } - } - } - - for i := uint32(10 * batch); i < 100*batch; i++ { - v, ok := m.Get(NeedleId(i)) - if i%37 == 0 { - if ok && v.Size > 0 { - t.Fatal("key", i, "should have been deleted needle value", v) - } - } else if i%2 == 0 { - if v == nil { - t.Fatal("key", i, "missing") - } - if v.Size != i { - t.Fatal("key", i, "size", v.Size) - } - } - } - -} diff --git a/weed/storage/crc.go b/weed/storage/needle/crc.go index e31e0f815..6fd910bb7 100644 --- a/weed/storage/crc.go +++ b/weed/storage/needle/crc.go @@ -1,7 +1,8 @@ -package storage +package needle import ( "fmt" + "github.com/klauspost/crc32" "github.com/chrislusf/seaweedfs/weed/util" diff --git a/weed/storage/needle/file_id.go b/weed/storage/needle/file_id.go new file mode 100644 index 000000000..5dabb0f25 --- /dev/null +++ b/weed/storage/needle/file_id.go @@ -0,0 +1,81 @@ +package needle + +import ( + "encoding/hex" + "fmt" + "strings" + + . "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +type FileId struct { + VolumeId VolumeId + Key NeedleId + Cookie Cookie +} + +func NewFileIdFromNeedle(VolumeId VolumeId, n *Needle) *FileId { + return &FileId{VolumeId: VolumeId, Key: n.Id, Cookie: n.Cookie} +} + +func NewFileId(VolumeId VolumeId, key uint64, cookie uint32) *FileId { + return &FileId{VolumeId: VolumeId, Key: Uint64ToNeedleId(key), Cookie: Uint32ToCookie(cookie)} +} + +// Deserialize the file id +func ParseFileIdFromString(fid string) (*FileId, error) { + vid, needleKeyCookie, err := splitVolumeId(fid) + if err != nil { + return nil, err + } + volumeId, err := NewVolumeId(vid) + if err != nil { + return nil, err + } + + nid, cookie, err := ParseNeedleIdCookie(needleKeyCookie) + if err != nil { + return nil, err + } + fileId := &FileId{VolumeId: volumeId, Key: nid, Cookie: cookie} + return fileId, nil +} + +func (n *FileId) GetVolumeId() VolumeId { + return n.VolumeId +} + +func (n *FileId) GetNeedleId() NeedleId { + return n.Key +} + +func (n *FileId) GetCookie() Cookie { + return n.Cookie +} + +func (n *FileId) GetNeedleIdCookie() string { + return formatNeedleIdCookie(n.Key, n.Cookie) +} + +func (n *FileId) String() string { + return n.VolumeId.String() + "," + formatNeedleIdCookie(n.Key, n.Cookie) +} + +func formatNeedleIdCookie(key NeedleId, cookie Cookie) string { + bytes := make([]byte, NeedleIdSize+CookieSize) + NeedleIdToBytes(bytes[0:NeedleIdSize], key) + CookieToBytes(bytes[NeedleIdSize:NeedleIdSize+CookieSize], cookie) + nonzero_index := 0 + for ; bytes[nonzero_index] == 0; nonzero_index++ { + } + return hex.EncodeToString(bytes[nonzero_index:]) +} + +// copied from operation/delete_content.go, to cut off cycle dependency +func splitVolumeId(fid string) (vid string, key_cookie string, err error) { + commaIndex := strings.Index(fid, ",") + if commaIndex <= 0 { + return "", "", fmt.Errorf("wrong fid format") + } + return fid[:commaIndex], fid[commaIndex+1:], nil +} diff --git a/weed/storage/needle/file_id_test.go b/weed/storage/needle/file_id_test.go new file mode 100644 index 000000000..a1a2c61fc --- /dev/null +++ b/weed/storage/needle/file_id_test.go @@ -0,0 +1,55 @@ +package needle + +import ( + "github.com/chrislusf/seaweedfs/weed/storage/types" + "testing" +) + +func TestParseFileIdFromString(t *testing.T) { + fidStr1 := "100,12345678" + _, err := ParseFileIdFromString(fidStr1) + if err == nil { + t.Errorf("%s : KeyHash is too short", fidStr1) + } + + fidStr1 = "100, 12345678" + _, err = ParseFileIdFromString(fidStr1) + if err == nil { + t.Errorf("%s : needlId invalid syntax", fidStr1) + } + + fidStr1 = "100,123456789" + _, err = ParseFileIdFromString(fidStr1) + if err != nil { + t.Errorf("%s : should be OK", fidStr1) + } + + var fileId *FileId + fidStr1 = "100,123456789012345678901234" + fileId, err = ParseFileIdFromString(fidStr1) + if err != nil { + t.Errorf("%s : should be OK", fidStr1) + } + if !(fileId.VolumeId == VolumeId(100) && + fileId.Key == types.NeedleId(0x1234567890123456) && + fileId.Cookie == types.Cookie(types.Uint32ToCookie(uint32(0x78901234)))) { + t.Errorf("src : %s, dest : %v", fidStr1, fileId) + } + + fidStr1 = "100,abcd0000abcd" + fileId, err = ParseFileIdFromString(fidStr1) + if err != nil { + t.Errorf("%s : should be OK", fidStr1) + } + if !(fileId.VolumeId == VolumeId(100) && + fileId.Key == types.NeedleId(0xabcd) && + fileId.Cookie == types.Cookie(types.Uint32ToCookie(uint32(0xabcd)))) { + t.Errorf("src : %s, dest : %v", fidStr1, fileId) + } + + fidStr1 = "100,1234567890123456789012345" + _, err = ParseFileIdFromString(fidStr1) + if err == nil { + t.Errorf("%s : needleId is too long", fidStr1) + } +} diff --git a/weed/storage/needle.go b/weed/storage/needle/needle.go index 4d61c36d0..d3969e868 100644 --- a/weed/storage/needle.go +++ b/weed/storage/needle/needle.go @@ -1,4 +1,4 @@ -package storage +package needle import ( "encoding/json" @@ -10,7 +10,6 @@ import ( "github.com/chrislusf/seaweedfs/weed/images" . "github.com/chrislusf/seaweedfs/weed/storage/types" - "io/ioutil" ) const ( @@ -49,52 +48,28 @@ func (n *Needle) String() (str string) { return } -func ParseUpload(r *http.Request) ( - fileName string, data []byte, mimeType string, pairMap map[string]string, isGzipped bool, - modifiedTime uint64, ttl *TTL, isChunkedFile bool, e error) { - pairMap = make(map[string]string) - for k, v := range r.Header { - if len(v) > 0 && strings.HasPrefix(k, PairNamePrefix) { - pairMap[k] = v[0] - } - } - - if r.Method == "POST" { - fileName, data, mimeType, isGzipped, isChunkedFile, e = parseMultipart(r) - } else { - isGzipped = false - mimeType = r.Header.Get("Content-Type") - fileName = "" - data, e = ioutil.ReadAll(r.Body) - } - if e != nil { - return - } - - modifiedTime, _ = strconv.ParseUint(r.FormValue("ts"), 10, 64) - ttl, _ = ReadTTL(r.FormValue("ttl")) - - return -} -func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) { - var pairMap map[string]string - fname, mimeType, isGzipped, isChunkedFile := "", "", false, false +func CreateNeedleFromRequest(r *http.Request, fixJpgOrientation bool, sizeLimit int64) (n *Needle, originalSize int, e error) { n = new(Needle) - fname, n.Data, mimeType, pairMap, isGzipped, n.LastModified, n.Ttl, isChunkedFile, e = ParseUpload(r) + pu, e := ParseUpload(r, sizeLimit) if e != nil { return } - if len(fname) < 256 { - n.Name = []byte(fname) + n.Data = pu.Data + originalSize = pu.OriginalDataSize + n.LastModified = pu.ModifiedTime + n.Ttl = pu.Ttl + + if len(pu.FileName) < 256 { + n.Name = []byte(pu.FileName) n.SetHasName() } - if len(mimeType) < 256 { - n.Mime = []byte(mimeType) + if len(pu.MimeType) < 256 { + n.Mime = []byte(pu.MimeType) n.SetHasMime() } - if len(pairMap) != 0 { + if len(pu.PairMap) != 0 { trimmedPairMap := make(map[string]string) - for k, v := range pairMap { + for k, v := range pu.PairMap { trimmedPairMap[k[len(PairNamePrefix):]] = v } @@ -105,7 +80,7 @@ func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) { n.SetHasPairs() } } - if isGzipped { + if pu.IsGzipped { n.SetGzipped() } if n.LastModified == 0 { @@ -116,13 +91,13 @@ func NewNeedle(r *http.Request, fixJpgOrientation bool) (n *Needle, e error) { n.SetHasTtl() } - if isChunkedFile { + if pu.IsChunkedFile { n.SetIsChunkManifest() } if fixJpgOrientation { - loweredName := strings.ToLower(fname) - if mimeType == "image/jpeg" || strings.HasSuffix(loweredName, ".jpg") || strings.HasSuffix(loweredName, ".jpeg") { + loweredName := strings.ToLower(pu.FileName) + if pu.MimeType == "image/jpeg" || strings.HasSuffix(loweredName, ".jpg") || strings.HasSuffix(loweredName, ".jpeg") { n.Data = images.FixJpgOrientation(n.Data) } } @@ -156,7 +131,7 @@ func (n *Needle) ParsePath(fid string) (err error) { } if delta != "" { if d, e := strconv.ParseUint(delta, 10, 64); e == nil { - n.Id += NeedleId(d) + n.Id += Uint64ToNeedleId(d) } else { return e } diff --git a/weed/storage/needle/needle_parse_upload.go b/weed/storage/needle/needle_parse_upload.go new file mode 100644 index 000000000..85526aaa8 --- /dev/null +++ b/weed/storage/needle/needle_parse_upload.go @@ -0,0 +1,166 @@ +package needle + +import ( + "fmt" + "io" + "io/ioutil" + "mime" + "net/http" + "path" + "strconv" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/util" +) + +type ParsedUpload struct { + FileName string + Data []byte + MimeType string + PairMap map[string]string + IsGzipped bool + OriginalDataSize int + ModifiedTime uint64 + Ttl *TTL + IsChunkedFile bool + UncompressedData []byte +} + +func ParseUpload(r *http.Request, sizeLimit int64) (pu *ParsedUpload, e error) { + pu = &ParsedUpload{} + pu.PairMap = make(map[string]string) + for k, v := range r.Header { + if len(v) > 0 && strings.HasPrefix(k, PairNamePrefix) { + pu.PairMap[k] = v[0] + } + } + + if r.Method == "POST" { + e = parseMultipart(r, sizeLimit, pu) + } else { + e = parsePut(r, sizeLimit, pu) + } + if e != nil { + return + } + + pu.ModifiedTime, _ = strconv.ParseUint(r.FormValue("ts"), 10, 64) + pu.Ttl, _ = ReadTTL(r.FormValue("ttl")) + + pu.OriginalDataSize = len(pu.Data) + pu.UncompressedData = pu.Data + if pu.IsGzipped { + if unzipped, e := util.UnGzipData(pu.Data); e == nil { + pu.OriginalDataSize = len(unzipped) + pu.UncompressedData = unzipped + } + } else if shouldGzip, _ := util.IsGzippableFileType("", pu.MimeType); shouldGzip { + if compressedData, err := util.GzipData(pu.Data); err == nil { + pu.Data = compressedData + pu.IsGzipped = true + } + } + + return +} + +func parsePut(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) { + pu.IsGzipped = r.Header.Get("Content-Encoding") == "gzip" + pu.MimeType = r.Header.Get("Content-Type") + pu.FileName = "" + pu.Data, e = ioutil.ReadAll(io.LimitReader(r.Body, sizeLimit+1)) + if e == io.EOF || int64(pu.OriginalDataSize) == sizeLimit+1 { + io.Copy(ioutil.Discard, r.Body) + } + r.Body.Close() + return nil +} + +func parseMultipart(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) { + defer func() { + if e != nil && r.Body != nil { + io.Copy(ioutil.Discard, r.Body) + r.Body.Close() + } + }() + form, fe := r.MultipartReader() + if fe != nil { + glog.V(0).Infoln("MultipartReader [ERROR]", fe) + e = fe + return + } + + //first multi-part item + part, fe := form.NextPart() + if fe != nil { + glog.V(0).Infoln("Reading Multi part [ERROR]", fe) + e = fe + return + } + + pu.FileName = part.FileName() + if pu.FileName != "" { + pu.FileName = path.Base(pu.FileName) + } + + pu.Data, e = ioutil.ReadAll(io.LimitReader(part, sizeLimit+1)) + if e != nil { + glog.V(0).Infoln("Reading Content [ERROR]", e) + return + } + if len(pu.Data) == int(sizeLimit)+1 { + e = fmt.Errorf("file over the limited %d bytes", sizeLimit) + return + } + + //if the filename is empty string, do a search on the other multi-part items + for pu.FileName == "" { + part2, fe := form.NextPart() + if fe != nil { + break // no more or on error, just safely break + } + + fName := part2.FileName() + + //found the first <file type> multi-part has filename + if fName != "" { + data2, fe2 := ioutil.ReadAll(io.LimitReader(part2, sizeLimit+1)) + if fe2 != nil { + glog.V(0).Infoln("Reading Content [ERROR]", fe2) + e = fe2 + return + } + if len(data2) == int(sizeLimit)+1 { + e = fmt.Errorf("file over the limited %d bytes", sizeLimit) + return + } + + //update + pu.Data = data2 + pu.FileName = path.Base(fName) + break + } + } + + pu.IsChunkedFile, _ = strconv.ParseBool(r.FormValue("cm")) + + if !pu.IsChunkedFile { + + dotIndex := strings.LastIndex(pu.FileName, ".") + ext, mtype := "", "" + if dotIndex > 0 { + ext = strings.ToLower(pu.FileName[dotIndex:]) + mtype = mime.TypeByExtension(ext) + } + contentType := part.Header.Get("Content-Type") + if contentType != "" && contentType != "application/octet-stream" && mtype != contentType { + pu.MimeType = contentType //only return mime type if not deductable + mtype = contentType + } + + pu.IsGzipped = part.Header.Get("Content-Encoding") == "gzip" + } + + return +} diff --git a/weed/storage/needle_read_write.go b/weed/storage/needle/needle_read_write.go index aa5974842..7f8aa4823 100644 --- a/weed/storage/needle_read_write.go +++ b/weed/storage/needle/needle_read_write.go @@ -1,12 +1,13 @@ -package storage +package needle import ( "errors" "fmt" "io" - "os" + "math" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/backend" . "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/chrislusf/seaweedfs/weed/util" ) @@ -24,48 +25,38 @@ const ( ) func (n *Needle) DiskSize(version Version) int64 { - return getActualSize(n.Size, version) + return GetActualSize(n.Size, version) } -func (n *Needle) Append(w io.Writer, version Version) (size uint32, actualSize int64, err error) { - if s, ok := w.(io.Seeker); ok { - if end, e := s.Seek(0, 1); e == nil { - defer func(s io.Seeker, off int64) { - if err != nil { - if _, e = s.Seek(off, 0); e != nil { - glog.V(0).Infof("Failed to seek %s back to %d with error: %v", w, off, e) - } - } - }(s, end) - } else { - err = fmt.Errorf("Cannot Read Current Volume Position: %v", e) - return - } - } +func (n *Needle) prepareWriteBuffer(version Version) ([]byte, uint32, int64, error) { + + writeBytes := make([]byte, 0) + switch version { case Version1: - header := make([]byte, NeedleEntrySize) + header := make([]byte, NeedleHeaderSize) CookieToBytes(header[0:CookieSize], n.Cookie) NeedleIdToBytes(header[CookieSize:CookieSize+NeedleIdSize], n.Id) n.Size = uint32(len(n.Data)) - size = n.Size util.Uint32toBytes(header[CookieSize+NeedleIdSize:CookieSize+NeedleIdSize+SizeSize], n.Size) - if _, err = w.Write(header); err != nil { - return - } - if _, err = w.Write(n.Data); err != nil { - return - } - actualSize = NeedleEntrySize + int64(n.Size) + size := n.Size + actualSize := NeedleHeaderSize + int64(n.Size) + writeBytes = append(writeBytes, header...) + writeBytes = append(writeBytes, n.Data...) padding := PaddingLength(n.Size, version) util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value()) - _, err = w.Write(header[0 : NeedleChecksumSize+padding]) - return + writeBytes = append(writeBytes, header[0:NeedleChecksumSize+padding]...) + return writeBytes, size, actualSize, nil case Version2, Version3: - header := make([]byte, NeedleEntrySize+TimestampSize) // adding timestamp to reuse it and avoid extra allocation + header := make([]byte, NeedleHeaderSize+TimestampSize) // adding timestamp to reuse it and avoid extra allocation CookieToBytes(header[0:CookieSize], n.Cookie) NeedleIdToBytes(header[CookieSize:CookieSize+NeedleIdSize], n.Id) - n.DataSize, n.NameSize, n.MimeSize = uint32(len(n.Data)), uint8(len(n.Name)), uint8(len(n.Mime)) + if len(n.Name) >= math.MaxUint8 { + n.NameSize = math.MaxUint8 + } else { + n.NameSize = uint8(len(n.Name)) + } + n.DataSize, n.MimeSize = uint32(len(n.Data)), uint8(len(n.Mime)) if n.DataSize > 0 { n.Size = 4 + n.DataSize + 1 if n.HasName() { @@ -86,130 +77,141 @@ func (n *Needle) Append(w io.Writer, version Version) (size uint32, actualSize i } else { n.Size = 0 } - size = n.DataSize util.Uint32toBytes(header[CookieSize+NeedleIdSize:CookieSize+NeedleIdSize+SizeSize], n.Size) - if _, err = w.Write(header[0:NeedleEntrySize]); err != nil { - return - } + writeBytes = append(writeBytes, header[0:NeedleHeaderSize]...) if n.DataSize > 0 { util.Uint32toBytes(header[0:4], n.DataSize) - if _, err = w.Write(header[0:4]); err != nil { - return - } - if _, err = w.Write(n.Data); err != nil { - return - } + writeBytes = append(writeBytes, header[0:4]...) + writeBytes = append(writeBytes, n.Data...) util.Uint8toBytes(header[0:1], n.Flags) - if _, err = w.Write(header[0:1]); err != nil { - return - } + writeBytes = append(writeBytes, header[0:1]...) if n.HasName() { util.Uint8toBytes(header[0:1], n.NameSize) - if _, err = w.Write(header[0:1]); err != nil { - return - } - if _, err = w.Write(n.Name); err != nil { - return - } + writeBytes = append(writeBytes, header[0:1]...) + writeBytes = append(writeBytes, n.Name[:n.NameSize]...) } if n.HasMime() { util.Uint8toBytes(header[0:1], n.MimeSize) - if _, err = w.Write(header[0:1]); err != nil { - return - } - if _, err = w.Write(n.Mime); err != nil { - return - } + writeBytes = append(writeBytes, header[0:1]...) + writeBytes = append(writeBytes, n.Mime...) } if n.HasLastModifiedDate() { util.Uint64toBytes(header[0:8], n.LastModified) - if _, err = w.Write(header[8-LastModifiedBytesLength : 8]); err != nil { - return - } + writeBytes = append(writeBytes, header[8-LastModifiedBytesLength:8]...) } if n.HasTtl() && n.Ttl != nil { n.Ttl.ToBytes(header[0:TtlBytesLength]) - if _, err = w.Write(header[0:TtlBytesLength]); err != nil { - return - } + writeBytes = append(writeBytes, header[0:TtlBytesLength]...) } if n.HasPairs() { util.Uint16toBytes(header[0:2], n.PairsSize) - if _, err = w.Write(header[0:2]); err != nil { - return - } - if _, err = w.Write(n.Pairs); err != nil { - return - } + writeBytes = append(writeBytes, header[0:2]...) + writeBytes = append(writeBytes, n.Pairs...) } } padding := PaddingLength(n.Size, version) util.Uint32toBytes(header[0:NeedleChecksumSize], n.Checksum.Value()) if version == Version2 { - _, err = w.Write(header[0 : NeedleChecksumSize+padding]) + writeBytes = append(writeBytes, header[0:NeedleChecksumSize+padding]...) } else { // version3 util.Uint64toBytes(header[NeedleChecksumSize:NeedleChecksumSize+TimestampSize], n.AppendAtNs) - _, err = w.Write(header[0 : NeedleChecksumSize+TimestampSize+padding]) + writeBytes = append(writeBytes, header[0:NeedleChecksumSize+TimestampSize+padding]...) } - return n.DataSize, getActualSize(n.Size, version), err + return writeBytes, n.DataSize, GetActualSize(n.Size, version), nil } - return 0, 0, fmt.Errorf("Unsupported Version! (%d)", version) + + return writeBytes, 0, 0, fmt.Errorf("Unsupported Version! (%d)", version) } -func ReadNeedleBlob(r *os.File, offset int64, size uint32, version Version) (dataSlice []byte, err error) { - dataSlice = make([]byte, int(getActualSize(size, version))) +func (n *Needle) Append(w backend.BackendStorageFile, version Version) (offset uint64, size uint32, actualSize int64, err error) { + + if end, _, e := w.GetStat(); e == nil { + defer func(w backend.BackendStorageFile, off int64) { + if err != nil { + if te := w.Truncate(end); te != nil { + glog.V(0).Infof("Failed to truncate %s back to %d with error: %v", w.Name(), end, te) + } + } + }(w, end) + offset = uint64(end) + } else { + err = fmt.Errorf("Cannot Read Current Volume Position: %v", e) + return + } + + bytesToWrite, size, actualSize, err := n.prepareWriteBuffer(version) + + if err == nil { + _, err = w.WriteAt(bytesToWrite, int64(offset)) + } + + return offset, size, actualSize, err +} + +func ReadNeedleBlob(r backend.BackendStorageFile, offset int64, size uint32, version Version) (dataSlice []byte, err error) { + + dataSize := GetActualSize(size, version) + dataSlice = make([]byte, int(dataSize)) + _, err = r.ReadAt(dataSlice, offset) return dataSlice, err + } -func (n *Needle) ReadData(r *os.File, offset int64, size uint32, version Version) (err error) { - bytes, err := ReadNeedleBlob(r, offset, size, version) - if err != nil { - return err - } +// ReadBytes hydrates the needle from the bytes buffer, with only n.Id is set. +func (n *Needle) ReadBytes(bytes []byte, offset int64, size uint32, version Version) (err error) { n.ParseNeedleHeader(bytes) if n.Size != size { - return fmt.Errorf("File Entry Not Found. Needle id %d expected size %d Memory %d", n.Id, n.Size, size) + return fmt.Errorf("entry not found: offset %d found id %d size %d, expected size %d", offset, n.Id, n.Size, size) } switch version { case Version1: - n.Data = bytes[NeedleEntrySize : NeedleEntrySize+size] + n.Data = bytes[NeedleHeaderSize : NeedleHeaderSize+size] case Version2, Version3: - n.readNeedleDataVersion2(bytes[NeedleEntrySize : NeedleEntrySize+int(n.Size)]) + err = n.readNeedleDataVersion2(bytes[NeedleHeaderSize : NeedleHeaderSize+int(n.Size)]) } - if size == 0 { - return nil + if err != nil && err != io.EOF { + return err } - checksum := util.BytesToUint32(bytes[NeedleEntrySize+size : NeedleEntrySize+size+NeedleChecksumSize]) - newChecksum := NewCRC(n.Data) - if checksum != newChecksum.Value() { - return errors.New("CRC error! Data On Disk Corrupted") + if size > 0 { + checksum := util.BytesToUint32(bytes[NeedleHeaderSize+size : NeedleHeaderSize+size+NeedleChecksumSize]) + newChecksum := NewCRC(n.Data) + if checksum != newChecksum.Value() { + return errors.New("CRC error! Data On Disk Corrupted") + } + n.Checksum = newChecksum } - n.Checksum = newChecksum if version == Version3 { - tsOffset := NeedleEntrySize + size + NeedleChecksumSize + tsOffset := NeedleHeaderSize + size + NeedleChecksumSize n.AppendAtNs = util.BytesToUint64(bytes[tsOffset : tsOffset+TimestampSize]) } return nil } +// ReadData hydrates the needle from the file, with only n.Id is set. +func (n *Needle) ReadData(r backend.BackendStorageFile, offset int64, size uint32, version Version) (err error) { + bytes, err := ReadNeedleBlob(r, offset, size, version) + if err != nil { + return err + } + return n.ReadBytes(bytes, offset, size, version) +} + func (n *Needle) ParseNeedleHeader(bytes []byte) { n.Cookie = BytesToCookie(bytes[0:CookieSize]) n.Id = BytesToNeedleId(bytes[CookieSize : CookieSize+NeedleIdSize]) - n.Size = util.BytesToUint32(bytes[CookieSize+NeedleIdSize : NeedleEntrySize]) + n.Size = util.BytesToUint32(bytes[CookieSize+NeedleIdSize : NeedleHeaderSize]) } -func (n *Needle) readNeedleDataVersion2(bytes []byte) { +func (n *Needle) readNeedleDataVersion2(bytes []byte) (err error) { index, lenBytes := 0, len(bytes) if index < lenBytes { n.DataSize = util.BytesToUint32(bytes[index : index+4]) index = index + 4 if int(n.DataSize)+index > lenBytes { - // this if clause is due to bug #87 and #93, fixed in v0.69 - // remove this clause later - return + return fmt.Errorf("index out of range %d", 1) } n.Data = bytes[index : index+int(n.DataSize)] index = index + int(n.DataSize) @@ -219,53 +221,75 @@ func (n *Needle) readNeedleDataVersion2(bytes []byte) { if index < lenBytes && n.HasName() { n.NameSize = uint8(bytes[index]) index = index + 1 + if int(n.NameSize)+index > lenBytes { + return fmt.Errorf("index out of range %d", 2) + } n.Name = bytes[index : index+int(n.NameSize)] index = index + int(n.NameSize) } if index < lenBytes && n.HasMime() { n.MimeSize = uint8(bytes[index]) index = index + 1 + if int(n.MimeSize)+index > lenBytes { + return fmt.Errorf("index out of range %d", 3) + } n.Mime = bytes[index : index+int(n.MimeSize)] index = index + int(n.MimeSize) } if index < lenBytes && n.HasLastModifiedDate() { + if LastModifiedBytesLength+index > lenBytes { + return fmt.Errorf("index out of range %d", 4) + } n.LastModified = util.BytesToUint64(bytes[index : index+LastModifiedBytesLength]) index = index + LastModifiedBytesLength } if index < lenBytes && n.HasTtl() { + if TtlBytesLength+index > lenBytes { + return fmt.Errorf("index out of range %d", 5) + } n.Ttl = LoadTTLFromBytes(bytes[index : index+TtlBytesLength]) index = index + TtlBytesLength } if index < lenBytes && n.HasPairs() { + if 2+index > lenBytes { + return fmt.Errorf("index out of range %d", 6) + } n.PairsSize = util.BytesToUint16(bytes[index : index+2]) index += 2 + if int(n.PairsSize)+index > lenBytes { + return fmt.Errorf("index out of range %d", 7) + } end := index + int(n.PairsSize) n.Pairs = bytes[index:end] index = end } + return nil } -func ReadNeedleHeader(r *os.File, version Version, offset int64) (n *Needle, bodyLength int64, err error) { +func ReadNeedleHeader(r backend.BackendStorageFile, version Version, offset int64) (n *Needle, bytes []byte, bodyLength int64, err error) { n = new(Needle) if version == Version1 || version == Version2 || version == Version3 { - bytes := make([]byte, NeedleEntrySize) + bytes = make([]byte, NeedleHeaderSize) + var count int count, err = r.ReadAt(bytes, offset) if count <= 0 || err != nil { - return nil, 0, err + return nil, bytes, 0, err } + n.ParseNeedleHeader(bytes) bodyLength = NeedleBodyLength(n.Size, version) } + return } func PaddingLength(needleSize uint32, version Version) uint32 { if version == Version3 { // this is same value as version2, but just listed here for clarity - return NeedlePaddingSize - ((NeedleEntrySize + needleSize + NeedleChecksumSize + TimestampSize) % NeedlePaddingSize) + return NeedlePaddingSize - ((NeedleHeaderSize + needleSize + NeedleChecksumSize + TimestampSize) % NeedlePaddingSize) } - return NeedlePaddingSize - ((NeedleEntrySize + needleSize + NeedleChecksumSize) % NeedlePaddingSize) + return NeedlePaddingSize - ((NeedleHeaderSize + needleSize + NeedleChecksumSize) % NeedlePaddingSize) } func NeedleBodyLength(needleSize uint32, version Version) int64 { @@ -277,27 +301,40 @@ func NeedleBodyLength(needleSize uint32, version Version) int64 { //n should be a needle already read the header //the input stream will read until next file entry -func (n *Needle) ReadNeedleBody(r *os.File, version Version, offset int64, bodyLength int64) (err error) { +func (n *Needle) ReadNeedleBody(r backend.BackendStorageFile, version Version, offset int64, bodyLength int64) (bytes []byte, err error) { + if bodyLength <= 0 { + return nil, nil + } + bytes = make([]byte, bodyLength) + if _, err = r.ReadAt(bytes, offset); err != nil { + return + } + + err = n.ReadNeedleBodyBytes(bytes, version) + + return +} + +func (n *Needle) ReadNeedleBodyBytes(needleBody []byte, version Version) (err error) { + + if len(needleBody) <= 0 { return nil } switch version { case Version1: - bytes := make([]byte, bodyLength) - if _, err = r.ReadAt(bytes, offset); err != nil { - return - } - n.Data = bytes[:n.Size] + n.Data = needleBody[:n.Size] n.Checksum = NewCRC(n.Data) case Version2, Version3: - bytes := make([]byte, bodyLength) - if _, err = r.ReadAt(bytes, offset); err != nil { - return - } - n.readNeedleDataVersion2(bytes[0:n.Size]) + err = n.readNeedleDataVersion2(needleBody[0:n.Size]) n.Checksum = NewCRC(n.Data) + + if version == Version3 { + tsOffset := n.Size + NeedleChecksumSize + n.AppendAtNs = util.BytesToUint64(needleBody[tsOffset : tsOffset+TimestampSize]) + } default: - err = fmt.Errorf("Unsupported Version! (%d)", version) + err = fmt.Errorf("unsupported version %d!", version) } return } @@ -348,3 +385,7 @@ func (n *Needle) HasPairs() bool { func (n *Needle) SetHasPairs() { n.Flags = n.Flags | FlagHasPairs } + +func GetActualSize(size uint32, version Version) int64 { + return NeedleHeaderSize + NeedleBodyLength(size, version) +} diff --git a/weed/storage/needle/needle_read_write_test.go b/weed/storage/needle/needle_read_write_test.go new file mode 100644 index 000000000..47582dd26 --- /dev/null +++ b/weed/storage/needle/needle_read_write_test.go @@ -0,0 +1,65 @@ +package needle + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +func TestAppend(t *testing.T) { + n := &Needle{ + + Cookie: types.Cookie(123), // Cookie Cookie `comment:"random number to mitigate brute force lookups"` + Id: types.NeedleId(123), // Id NeedleId `comment:"needle id"` + Size: 8, // Size uint32 `comment:"sum of DataSize,Data,NameSize,Name,MimeSize,Mime"` + DataSize: 4, // DataSize uint32 `comment:"Data size"` //version2 + Data: []byte("abcd"), // Data []byte `comment:"The actual file data"` + Flags: 0, // Flags byte `comment:"boolean flags"` //version2 + NameSize: 0, // NameSize uint8 //version2 + Name: nil, // Name []byte `comment:"maximum 256 characters"` //version2 + MimeSize: 0, // MimeSize uint8 //version2 + Mime: nil, // Mime []byte `comment:"maximum 256 characters"` //version2 + PairsSize: 0, // PairsSize uint16 //version2 + Pairs: nil, // Pairs []byte `comment:"additional name value pairs, json format, maximum 6 + LastModified: 123, // LastModified uint64 //only store LastModifiedBytesLength bytes, which is 5 bytes + Ttl: nil, // Ttl *TTL + Checksum: 123, // Checksum CRC `comment:"CRC32 to check integrity"` + AppendAtNs: 123, // AppendAtNs uint64 `comment:"append timestamp in nano seconds"` //version3 + Padding: nil, // Padding []byte `comment:"Aligned to 8 bytes"` + } + + tempFile, err := ioutil.TempFile("", ".dat") + if err != nil { + t.Errorf("Fail TempFile. %v", err) + return + } + + /* + uint8 : 0 to 255 + uint16 : 0 to 65535 + uint32 : 0 to 4294967295 + uint64 : 0 to 18446744073709551615 + int8 : -128 to 127 + int16 : -32768 to 32767 + int32 : -2147483648 to 2147483647 + int64 : -9223372036854775808 to 9223372036854775807 + */ + + fileSize := int64(4294967295) + 10000 + tempFile.Truncate(fileSize) + defer func() { + tempFile.Close() + os.Remove(tempFile.Name()) + }() + + datBackend := backend.NewDiskFile(tempFile) + defer datBackend.Close() + + offset, _, _, _ := n.Append(datBackend, CurrentVersion) + if offset != uint64(fileSize) { + t.Errorf("Fail to Append Needle.") + } +} diff --git a/weed/storage/needle_test.go b/weed/storage/needle/needle_test.go index 65036409c..0f2dde98e 100644 --- a/weed/storage/needle_test.go +++ b/weed/storage/needle/needle_test.go @@ -1,8 +1,9 @@ -package storage +package needle import ( - "github.com/chrislusf/seaweedfs/weed/storage/types" "testing" + + "github.com/chrislusf/seaweedfs/weed/storage/types" ) func TestParseKeyHash(t *testing.T) { diff --git a/weed/storage/needle/needle_value.go b/weed/storage/needle/needle_value.go deleted file mode 100644 index b15d25245..000000000 --- a/weed/storage/needle/needle_value.go +++ /dev/null @@ -1,21 +0,0 @@ -package needle - -import ( - . "github.com/chrislusf/seaweedfs/weed/storage/types" - "github.com/google/btree" -) - -const ( - batch = 100000 -) - -type NeedleValue struct { - Key NeedleId - Offset Offset `comment:"Volume offset"` //since aligned to 8 bytes, range is 4G*8=32G - Size uint32 `comment:"Size of the data portion"` -} - -func (this NeedleValue) Less(than btree.Item) bool { - that := than.(NeedleValue) - return this.Key < that.Key -} diff --git a/weed/storage/volume_id.go b/weed/storage/needle/volume_id.go index 0333c6cf0..3366c14bf 100644 --- a/weed/storage/volume_id.go +++ b/weed/storage/needle/volume_id.go @@ -1,4 +1,4 @@ -package storage +package needle import ( "strconv" @@ -10,9 +10,9 @@ func NewVolumeId(vid string) (VolumeId, error) { volumeId, err := strconv.ParseUint(vid, 10, 64) return VolumeId(volumeId), err } -func (vid *VolumeId) String() string { - return strconv.FormatUint(uint64(*vid), 10) +func (vid VolumeId) String() string { + return strconv.FormatUint(uint64(vid), 10) } -func (vid *VolumeId) Next() VolumeId { - return VolumeId(uint32(*vid) + 1) +func (vid VolumeId) Next() VolumeId { + return VolumeId(uint32(vid) + 1) } diff --git a/weed/storage/needle/volume_id_test.go b/weed/storage/needle/volume_id_test.go new file mode 100644 index 000000000..5ffcd90f6 --- /dev/null +++ b/weed/storage/needle/volume_id_test.go @@ -0,0 +1,45 @@ +package needle + +import "testing" + +func TestNewVolumeId(t *testing.T) { + if _, err := NewVolumeId("1"); err != nil { + t.Error(err) + } + + if _, err := NewVolumeId("a"); err != nil { + t.Logf("a is not legal volume id, %v", err) + } +} + +func TestVolumeId_String(t *testing.T) { + if str := VolumeId(10).String(); str != "10" { + t.Errorf("to string failed") + } + + vid := VolumeId(11) + if str := vid.String(); str != "11" { + t.Errorf("to string failed") + } + + pvid := &vid + if str := pvid.String(); str != "11" { + t.Errorf("to string failed") + } +} + +func TestVolumeId_Next(t *testing.T) { + if vid := VolumeId(10).Next(); vid != VolumeId(11) { + t.Errorf("get next volume id failed") + } + + vid := VolumeId(11) + if new := vid.Next(); new != 12 { + t.Errorf("get next volume id failed") + } + + pvid := &vid + if new := pvid.Next(); new != 12 { + t.Errorf("get next volume id failed") + } +} diff --git a/weed/storage/volume_ttl.go b/weed/storage/needle/volume_ttl.go index 4318bb048..179057876 100644 --- a/weed/storage/volume_ttl.go +++ b/weed/storage/needle/volume_ttl.go @@ -1,4 +1,4 @@ -package storage +package needle import ( "strconv" @@ -16,8 +16,8 @@ const ( ) type TTL struct { - count byte - unit byte + Count byte + Unit byte } var EMPTY_TTL = &TTL{} @@ -43,12 +43,15 @@ func ReadTTL(ttlString string) (*TTL, error) { } count, err := strconv.Atoi(string(countBytes)) unit := toStoredByte(unitByte) - return &TTL{count: byte(count), unit: unit}, err + return &TTL{Count: byte(count), Unit: unit}, err } // read stored bytes to a ttl func LoadTTLFromBytes(input []byte) (t *TTL) { - return &TTL{count: input[0], unit: input[1]} + if input[0] == 0 && input[1] == 0 { + return EMPTY_TTL + } + return &TTL{Count: input[0], Unit: input[1]} } // read stored bytes to a ttl @@ -61,25 +64,28 @@ func LoadTTLFromUint32(ttl uint32) (t *TTL) { // save stored bytes to an output with 2 bytes func (t *TTL) ToBytes(output []byte) { - output[0] = t.count - output[1] = t.unit + output[0] = t.Count + output[1] = t.Unit } func (t *TTL) ToUint32() (output uint32) { - output = uint32(t.count) << 8 - output += uint32(t.unit) + if t == nil || t.Count == 0 { + return 0 + } + output = uint32(t.Count) << 8 + output += uint32(t.Unit) return output } func (t *TTL) String() string { - if t == nil || t.count == 0 { + if t == nil || t.Count == 0 { return "" } - if t.unit == Empty { + if t.Unit == Empty { return "" } - countString := strconv.Itoa(int(t.count)) - switch t.unit { + countString := strconv.Itoa(int(t.Count)) + switch t.Unit { case Minute: return countString + "m" case Hour: @@ -115,21 +121,21 @@ func toStoredByte(readableUnitByte byte) byte { } func (t TTL) Minutes() uint32 { - switch t.unit { + switch t.Unit { case Empty: return 0 case Minute: - return uint32(t.count) + return uint32(t.Count) case Hour: - return uint32(t.count) * 60 + return uint32(t.Count) * 60 case Day: - return uint32(t.count) * 60 * 24 + return uint32(t.Count) * 60 * 24 case Week: - return uint32(t.count) * 60 * 24 * 7 + return uint32(t.Count) * 60 * 24 * 7 case Month: - return uint32(t.count) * 60 * 24 * 31 + return uint32(t.Count) * 60 * 24 * 31 case Year: - return uint32(t.count) * 60 * 24 * 365 + return uint32(t.Count) * 60 * 24 * 365 } return 0 } diff --git a/weed/storage/volume_ttl_test.go b/weed/storage/needle/volume_ttl_test.go index 216469a4c..0afebebf5 100644 --- a/weed/storage/volume_ttl_test.go +++ b/weed/storage/needle/volume_ttl_test.go @@ -1,4 +1,4 @@ -package storage +package needle import ( "testing" diff --git a/weed/storage/volume_version.go b/weed/storage/needle/volume_version.go index fc0270c03..54daac77f 100644 --- a/weed/storage/volume_version.go +++ b/weed/storage/needle/volume_version.go @@ -1,4 +1,4 @@ -package storage +package needle type Version uint8 diff --git a/weed/storage/needle_byte_cache.go b/weed/storage/needle_byte_cache.go deleted file mode 100644 index 78c1ea862..000000000 --- a/weed/storage/needle_byte_cache.go +++ /dev/null @@ -1,11 +0,0 @@ -package storage - -import ( - "os" -) - -func getBytesForFileBlock(r *os.File, offset int64, readSize int) (dataSlice []byte, err error) { - dataSlice = make([]byte, readSize) - _, err = r.ReadAt(dataSlice, offset) - return dataSlice, err -} diff --git a/weed/storage/needle_map.go b/weed/storage/needle_map.go index 6d815679b..77d081ea7 100644 --- a/weed/storage/needle_map.go +++ b/weed/storage/needle_map.go @@ -2,27 +2,25 @@ package storage import ( "fmt" - "io/ioutil" "os" "sync" - "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" . "github.com/chrislusf/seaweedfs/weed/storage/types" - "github.com/chrislusf/seaweedfs/weed/util" ) type NeedleMapType int const ( - NeedleMapInMemory NeedleMapType = iota - NeedleMapLevelDb - NeedleMapBoltDb - NeedleMapBtree + NeedleMapInMemory NeedleMapType = iota + NeedleMapLevelDb // small memory footprint, 4MB total, 1 write buffer, 3 block buffer + NeedleMapLevelDbMedium // medium memory footprint, 8MB total, 3 write buffer, 5 block buffer + NeedleMapLevelDbLarge // large memory footprint, 12MB total, 4write buffer, 8 block buffer ) type NeedleMapper interface { Put(key NeedleId, offset Offset, size uint32) error - Get(key NeedleId) (element *needle.NeedleValue, ok bool) + Get(key NeedleId) (element *needle_map.NeedleValue, ok bool) Delete(key NeedleId, offset Offset) error Close() Destroy() error @@ -32,15 +30,13 @@ type NeedleMapper interface { DeletedCount() int MaxFileKey() NeedleId IndexFileSize() uint64 - IndexFileContent() ([]byte, error) - IndexFileName() string } type baseNeedleMapper struct { + mapMetric + indexFile *os.File indexFileAccessLock sync.Mutex - - mapMetric } func (nm *baseNeedleMapper) IndexFileSize() uint64 { @@ -51,21 +47,8 @@ func (nm *baseNeedleMapper) IndexFileSize() uint64 { return 0 } -func (nm *baseNeedleMapper) IndexFileName() string { - return nm.indexFile.Name() -} - -func IdxFileEntry(bytes []byte) (key NeedleId, offset Offset, size uint32) { - key = BytesToNeedleId(bytes[:NeedleIdSize]) - offset = BytesToOffset(bytes[NeedleIdSize : NeedleIdSize+OffsetSize]) - size = util.BytesToUint32(bytes[NeedleIdSize+OffsetSize : NeedleIdSize+OffsetSize+SizeSize]) - return -} func (nm *baseNeedleMapper) appendToIndexFile(key NeedleId, offset Offset, size uint32) error { - bytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize) - NeedleIdToBytes(bytes[0:NeedleIdSize], key) - OffsetToBytes(bytes[NeedleIdSize:NeedleIdSize+OffsetSize], offset) - util.Uint32toBytes(bytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], size) + bytes := needle_map.ToBytes(key, offset, size) nm.indexFileAccessLock.Lock() defer nm.indexFileAccessLock.Unlock() @@ -76,8 +59,3 @@ func (nm *baseNeedleMapper) appendToIndexFile(key NeedleId, offset Offset, size _, err := nm.indexFile.Write(bytes) return err } -func (nm *baseNeedleMapper) IndexFileContent() ([]byte, error) { - nm.indexFileAccessLock.Lock() - defer nm.indexFileAccessLock.Unlock() - return ioutil.ReadFile(nm.indexFile.Name()) -} diff --git a/weed/storage/needle_map/compact_map.go b/weed/storage/needle_map/compact_map.go new file mode 100644 index 000000000..76783d0b0 --- /dev/null +++ b/weed/storage/needle_map/compact_map.go @@ -0,0 +1,302 @@ +package needle_map + +import ( + "sort" + "sync" + + . "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +const ( + batch = 100000 +) + +type SectionalNeedleId uint32 + +const SectionalNeedleIdLimit = 1<<32 - 1 + +type SectionalNeedleValue struct { + Key SectionalNeedleId + OffsetLower OffsetLower `comment:"Volume offset"` //since aligned to 8 bytes, range is 4G*8=32G + Size uint32 `comment:"Size of the data portion"` +} + +type SectionalNeedleValueExtra struct { + OffsetHigher OffsetHigher +} + +type CompactSection struct { + sync.RWMutex + values []SectionalNeedleValue + valuesExtra []SectionalNeedleValueExtra + overflow Overflow + overflowExtra OverflowExtra + start NeedleId + end NeedleId + counter int +} + +type Overflow []SectionalNeedleValue +type OverflowExtra []SectionalNeedleValueExtra + +func NewCompactSection(start NeedleId) *CompactSection { + return &CompactSection{ + values: make([]SectionalNeedleValue, batch), + valuesExtra: make([]SectionalNeedleValueExtra, batch), + overflow: Overflow(make([]SectionalNeedleValue, 0)), + overflowExtra: OverflowExtra(make([]SectionalNeedleValueExtra, 0)), + start: start, + } +} + +//return old entry size +func (cs *CompactSection) Set(key NeedleId, offset Offset, size uint32) (oldOffset Offset, oldSize uint32) { + cs.Lock() + if key > cs.end { + cs.end = key + } + skey := SectionalNeedleId(key - cs.start) + if i := cs.binarySearchValues(skey); i >= 0 { + oldOffset.OffsetHigher, oldOffset.OffsetLower, oldSize = cs.valuesExtra[i].OffsetHigher, cs.values[i].OffsetLower, cs.values[i].Size + //println("key", key, "old size", ret) + cs.valuesExtra[i].OffsetHigher, cs.values[i].OffsetLower, cs.values[i].Size = offset.OffsetHigher, offset.OffsetLower, size + } else { + needOverflow := cs.counter >= batch + needOverflow = needOverflow || cs.counter > 0 && cs.values[cs.counter-1].Key > skey + if needOverflow { + //println("start", cs.start, "counter", cs.counter, "key", key) + if oldValueExtra, oldValue, found := cs.findOverflowEntry(skey); found { + oldOffset.OffsetHigher, oldOffset.OffsetLower, oldSize = oldValueExtra.OffsetHigher, oldValue.OffsetLower, oldValue.Size + } + cs.setOverflowEntry(skey, offset, size) + } else { + p := &cs.values[cs.counter] + p.Key, cs.valuesExtra[cs.counter].OffsetHigher, p.OffsetLower, p.Size = skey, offset.OffsetHigher, offset.OffsetLower, size + //println("added index", cs.counter, "key", key, cs.values[cs.counter].Key) + cs.counter++ + } + } + cs.Unlock() + return +} + +func (cs *CompactSection) setOverflowEntry(skey SectionalNeedleId, offset Offset, size uint32) { + needleValue := SectionalNeedleValue{Key: skey, OffsetLower: offset.OffsetLower, Size: size} + needleValueExtra := SectionalNeedleValueExtra{OffsetHigher: offset.OffsetHigher} + insertCandidate := sort.Search(len(cs.overflow), func(i int) bool { + return cs.overflow[i].Key >= needleValue.Key + }) + if insertCandidate != len(cs.overflow) && cs.overflow[insertCandidate].Key == needleValue.Key { + cs.overflow[insertCandidate] = needleValue + } else { + cs.overflow = append(cs.overflow, needleValue) + cs.overflowExtra = append(cs.overflowExtra, needleValueExtra) + for i := len(cs.overflow) - 1; i > insertCandidate; i-- { + cs.overflow[i] = cs.overflow[i-1] + cs.overflowExtra[i] = cs.overflowExtra[i-1] + } + cs.overflow[insertCandidate] = needleValue + } +} + +func (cs *CompactSection) findOverflowEntry(key SectionalNeedleId) (nve SectionalNeedleValueExtra, nv SectionalNeedleValue, found bool) { + foundCandidate := sort.Search(len(cs.overflow), func(i int) bool { + return cs.overflow[i].Key >= key + }) + if foundCandidate != len(cs.overflow) && cs.overflow[foundCandidate].Key == key { + return cs.overflowExtra[foundCandidate], cs.overflow[foundCandidate], true + } + return nve, nv, false +} + +func (cs *CompactSection) deleteOverflowEntry(key SectionalNeedleId) { + length := len(cs.overflow) + deleteCandidate := sort.Search(length, func(i int) bool { + return cs.overflow[i].Key >= key + }) + if deleteCandidate != length && cs.overflow[deleteCandidate].Key == key { + for i := deleteCandidate; i < length-1; i++ { + cs.overflow[i] = cs.overflow[i+1] + cs.overflowExtra[i] = cs.overflowExtra[i+1] + } + cs.overflow = cs.overflow[0 : length-1] + cs.overflowExtra = cs.overflowExtra[0 : length-1] + } +} + +//return old entry size +func (cs *CompactSection) Delete(key NeedleId) uint32 { + skey := SectionalNeedleId(key - cs.start) + cs.Lock() + ret := uint32(0) + if i := cs.binarySearchValues(skey); i >= 0 { + if cs.values[i].Size > 0 && cs.values[i].Size != TombstoneFileSize { + ret = cs.values[i].Size + cs.values[i].Size = TombstoneFileSize + } + } + if _, v, found := cs.findOverflowEntry(skey); found { + cs.deleteOverflowEntry(skey) + ret = v.Size + } + cs.Unlock() + return ret +} +func (cs *CompactSection) Get(key NeedleId) (*NeedleValue, bool) { + cs.RLock() + skey := SectionalNeedleId(key - cs.start) + if ve, v, ok := cs.findOverflowEntry(skey); ok { + cs.RUnlock() + nv := toNeedleValue(ve, v, cs) + return &nv, true + } + if i := cs.binarySearchValues(skey); i >= 0 { + cs.RUnlock() + nv := toNeedleValue(cs.valuesExtra[i], cs.values[i], cs) + return &nv, true + } + cs.RUnlock() + return nil, false +} +func (cs *CompactSection) binarySearchValues(key SectionalNeedleId) int { + x := sort.Search(cs.counter, func(i int) bool { + return cs.values[i].Key >= key + }) + if x == cs.counter { + return -1 + } + if cs.values[x].Key > key { + return -2 + } + return x +} + +//This map assumes mostly inserting increasing keys +//This map assumes mostly inserting increasing keys +type CompactMap struct { + list []*CompactSection +} + +func NewCompactMap() *CompactMap { + return &CompactMap{} +} + +func (cm *CompactMap) Set(key NeedleId, offset Offset, size uint32) (oldOffset Offset, oldSize uint32) { + x := cm.binarySearchCompactSection(key) + if x < 0 || (key-cm.list[x].start) > SectionalNeedleIdLimit { + // println(x, "adding to existing", len(cm.list), "sections, starting", key) + cs := NewCompactSection(key) + cm.list = append(cm.list, cs) + x = len(cm.list) - 1 + //keep compact section sorted by start + for x >= 0 { + if x > 0 && cm.list[x-1].start > key { + cm.list[x] = cm.list[x-1] + // println("shift", x, "start", cs.start, "to", x-1) + x = x - 1 + } else { + cm.list[x] = cs + // println("cs", x, "start", cs.start) + break + } + } + } + // println(key, "set to section[", x, "].start", cm.list[x].start) + return cm.list[x].Set(key, offset, size) +} +func (cm *CompactMap) Delete(key NeedleId) uint32 { + x := cm.binarySearchCompactSection(key) + if x < 0 { + return uint32(0) + } + return cm.list[x].Delete(key) +} +func (cm *CompactMap) Get(key NeedleId) (*NeedleValue, bool) { + x := cm.binarySearchCompactSection(key) + if x < 0 { + return nil, false + } + return cm.list[x].Get(key) +} +func (cm *CompactMap) binarySearchCompactSection(key NeedleId) int { + l, h := 0, len(cm.list)-1 + if h < 0 { + return -5 + } + if cm.list[h].start <= key { + if cm.list[h].counter < batch || key <= cm.list[h].end { + return h + } + return -4 + } + for l <= h { + m := (l + h) / 2 + if key < cm.list[m].start { + h = m - 1 + } else { // cm.list[m].start <= key + if cm.list[m+1].start <= key { + l = m + 1 + } else { + return m + } + } + } + return -3 +} + +// Visit visits all entries or stop if any error when visiting +func (cm *CompactMap) AscendingVisit(visit func(NeedleValue) error) error { + for _, cs := range cm.list { + cs.RLock() + var i, j int + for i, j = 0, 0; i < len(cs.overflow) && j < len(cs.values) && j < cs.counter; { + if cs.overflow[i].Key < cs.values[j].Key { + if err := visit(toNeedleValue(cs.overflowExtra[i], cs.overflow[i], cs)); err != nil { + cs.RUnlock() + return err + } + i++ + } else if cs.overflow[i].Key == cs.values[j].Key { + j++ + } else { + if err := visit(toNeedleValue(cs.valuesExtra[j], cs.values[j], cs)); err != nil { + cs.RUnlock() + return err + } + j++ + } + } + for ; i < len(cs.overflow); i++ { + if err := visit(toNeedleValue(cs.overflowExtra[i], cs.overflow[i], cs)); err != nil { + cs.RUnlock() + return err + } + } + for ; j < len(cs.values) && j < cs.counter; j++ { + if err := visit(toNeedleValue(cs.valuesExtra[j], cs.values[j], cs)); err != nil { + cs.RUnlock() + return err + } + } + cs.RUnlock() + } + return nil +} + +func toNeedleValue(snve SectionalNeedleValueExtra, snv SectionalNeedleValue, cs *CompactSection) NeedleValue { + offset := Offset{ + OffsetHigher: snve.OffsetHigher, + OffsetLower: snv.OffsetLower, + } + return NeedleValue{Key: NeedleId(snv.Key) + cs.start, Offset: offset, Size: snv.Size} +} + +func (nv NeedleValue) toSectionalNeedleValue(cs *CompactSection) (SectionalNeedleValue, SectionalNeedleValueExtra) { + return SectionalNeedleValue{ + SectionalNeedleId(nv.Key - cs.start), + nv.Offset.OffsetLower, + nv.Size, + }, SectionalNeedleValueExtra{ + nv.Offset.OffsetHigher, + } +} diff --git a/weed/storage/needle_map/compact_map_perf_test.go b/weed/storage/needle_map/compact_map_perf_test.go new file mode 100644 index 000000000..3a3648641 --- /dev/null +++ b/weed/storage/needle_map/compact_map_perf_test.go @@ -0,0 +1,93 @@ +package needle_map + +import ( + "fmt" + "log" + "os" + "runtime" + "testing" + "time" + + . "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" +) + +/* + +To see the memory usage: + +go test -run TestMemoryUsage +The TotalAlloc section shows the memory increase for each iteration. + +go test -run TestMemoryUsage -memprofile=mem.out +go tool pprof --alloc_space needle.test mem.out + + +*/ + +func TestMemoryUsage(t *testing.T) { + + var maps []*CompactMap + totalRowCount := uint64(0) + + startTime := time.Now() + for i := 0; i < 10; i++ { + indexFile, ie := os.OpenFile("../../../test/sample.idx", os.O_RDWR|os.O_RDONLY, 0644) + if ie != nil { + log.Fatalln(ie) + } + m, rowCount := loadNewNeedleMap(indexFile) + maps = append(maps, m) + totalRowCount += rowCount + + indexFile.Close() + + PrintMemUsage(totalRowCount) + now := time.Now() + fmt.Printf("\tTaken = %v\n", now.Sub(startTime)) + startTime = now + } + +} + +func loadNewNeedleMap(file *os.File) (*CompactMap, uint64) { + m := NewCompactMap() + bytes := make([]byte, NeedleMapEntrySize) + rowCount := uint64(0) + count, e := file.Read(bytes) + for count > 0 && e == nil { + for i := 0; i < count; i += NeedleMapEntrySize { + rowCount++ + key := BytesToNeedleId(bytes[i : i+NeedleIdSize]) + offset := BytesToOffset(bytes[i+NeedleIdSize : i+NeedleIdSize+OffsetSize]) + size := util.BytesToUint32(bytes[i+NeedleIdSize+OffsetSize : i+NeedleIdSize+OffsetSize+SizeSize]) + + if !offset.IsZero() { + m.Set(NeedleId(key), offset, size) + } else { + m.Delete(key) + } + } + + count, e = file.Read(bytes) + } + + return m, rowCount + +} + +func PrintMemUsage(totalRowCount uint64) { + + runtime.GC() + var m runtime.MemStats + runtime.ReadMemStats(&m) + // For info on each, see: https://golang.org/pkg/runtime/#MemStats + fmt.Printf("Each %.2f Bytes", float64(m.TotalAlloc)/float64(totalRowCount)) + fmt.Printf("\tAlloc = %v MiB", bToMb(m.Alloc)) + fmt.Printf("\tTotalAlloc = %v MiB", bToMb(m.TotalAlloc)) + fmt.Printf("\tSys = %v MiB", bToMb(m.Sys)) + fmt.Printf("\tNumGC = %v", m.NumGC) +} +func bToMb(b uint64) uint64 { + return b / 1024 / 1024 +} diff --git a/weed/storage/needle_map/compact_map_test.go b/weed/storage/needle_map/compact_map_test.go new file mode 100644 index 000000000..7eea3969a --- /dev/null +++ b/weed/storage/needle_map/compact_map_test.go @@ -0,0 +1,166 @@ +package needle_map + +import ( + "fmt" + . "github.com/chrislusf/seaweedfs/weed/storage/types" + "testing" +) + +func TestOverflow2(t *testing.T) { + m := NewCompactMap() + _, oldSize := m.Set(NeedleId(150088), ToOffset(8), 3000073) + if oldSize != 0 { + t.Fatalf("expecting no previous data") + } + _, oldSize = m.Set(NeedleId(150088), ToOffset(8), 3000073) + if oldSize != 3000073 { + t.Fatalf("expecting previous data size is %d, not %d", 3000073, oldSize) + } + m.Set(NeedleId(150073), ToOffset(8), 3000073) + m.Set(NeedleId(150089), ToOffset(8), 3000073) + m.Set(NeedleId(150076), ToOffset(8), 3000073) + m.Set(NeedleId(150124), ToOffset(8), 3000073) + m.Set(NeedleId(150137), ToOffset(8), 3000073) + m.Set(NeedleId(150147), ToOffset(8), 3000073) + m.Set(NeedleId(150145), ToOffset(8), 3000073) + m.Set(NeedleId(150158), ToOffset(8), 3000073) + m.Set(NeedleId(150162), ToOffset(8), 3000073) + + m.AscendingVisit(func(value NeedleValue) error { + println("needle key:", value.Key) + return nil + }) +} + +func TestIssue52(t *testing.T) { + m := NewCompactMap() + m.Set(NeedleId(10002), ToOffset(10002), 10002) + if element, ok := m.Get(NeedleId(10002)); ok { + fmt.Printf("key %d ok %v %d, %v, %d\n", 10002, ok, element.Key, element.Offset, element.Size) + } + m.Set(NeedleId(10001), ToOffset(10001), 10001) + if element, ok := m.Get(NeedleId(10002)); ok { + fmt.Printf("key %d ok %v %d, %v, %d\n", 10002, ok, element.Key, element.Offset, element.Size) + } else { + t.Fatal("key 10002 missing after setting 10001") + } +} + +func TestCompactMap(t *testing.T) { + m := NewCompactMap() + for i := uint32(0); i < 100*batch; i += 2 { + m.Set(NeedleId(i), ToOffset(int64(i)), i) + } + + for i := uint32(0); i < 100*batch; i += 37 { + m.Delete(NeedleId(i)) + } + + for i := uint32(0); i < 10*batch; i += 3 { + m.Set(NeedleId(i), ToOffset(int64(i+11)), i+5) + } + + // for i := uint32(0); i < 100; i++ { + // if v := m.Get(Key(i)); v != nil { + // glog.V(4).Infoln(i, "=", v.Key, v.Offset, v.Size) + // } + // } + + for i := uint32(0); i < 10*batch; i++ { + v, ok := m.Get(NeedleId(i)) + if i%3 == 0 { + if !ok { + t.Fatal("key", i, "missing!") + } + if v.Size != i+5 { + t.Fatal("key", i, "size", v.Size) + } + } else if i%37 == 0 { + if ok && v.Size != TombstoneFileSize { + t.Fatal("key", i, "should have been deleted needle value", v) + } + } else if i%2 == 0 { + if v.Size != i { + t.Fatal("key", i, "size", v.Size) + } + } + } + + for i := uint32(10 * batch); i < 100*batch; i++ { + v, ok := m.Get(NeedleId(i)) + if i%37 == 0 { + if ok && v.Size != TombstoneFileSize { + t.Fatal("key", i, "should have been deleted needle value", v) + } + } else if i%2 == 0 { + if v == nil { + t.Fatal("key", i, "missing") + } + if v.Size != i { + t.Fatal("key", i, "size", v.Size) + } + } + } + +} + +func TestOverflow(t *testing.T) { + cs := NewCompactSection(1) + + cs.setOverflowEntry(1, ToOffset(12), 12) + cs.setOverflowEntry(2, ToOffset(12), 12) + cs.setOverflowEntry(3, ToOffset(12), 12) + cs.setOverflowEntry(4, ToOffset(12), 12) + cs.setOverflowEntry(5, ToOffset(12), 12) + + if cs.overflow[2].Key != 3 { + t.Fatalf("expecting o[2] has key 3: %+v", cs.overflow[2].Key) + } + + cs.setOverflowEntry(3, ToOffset(24), 24) + + if cs.overflow[2].Key != 3 { + t.Fatalf("expecting o[2] has key 3: %+v", cs.overflow[2].Key) + } + + if cs.overflow[2].Size != 24 { + t.Fatalf("expecting o[2] has size 24: %+v", cs.overflow[2].Size) + } + + cs.deleteOverflowEntry(4) + + if len(cs.overflow) != 4 { + t.Fatalf("expecting 4 entries now: %+v", cs.overflow) + } + + _, x, _ := cs.findOverflowEntry(5) + if x.Key != 5 { + t.Fatalf("expecting entry 5 now: %+v", x) + } + + for i, x := range cs.overflow { + println("overflow[", i, "]:", x.Key) + } + println() + + cs.deleteOverflowEntry(1) + + for i, x := range cs.overflow { + println("overflow[", i, "]:", x.Key) + } + println() + + cs.setOverflowEntry(4, ToOffset(44), 44) + for i, x := range cs.overflow { + println("overflow[", i, "]:", x.Key) + } + println() + + cs.setOverflowEntry(1, ToOffset(11), 11) + + for i, x := range cs.overflow { + println("overflow[", i, "]:", x.Key) + } + println() + +} diff --git a/weed/storage/needle_map/memdb.go b/weed/storage/needle_map/memdb.go new file mode 100644 index 000000000..a52d52a10 --- /dev/null +++ b/weed/storage/needle_map/memdb.go @@ -0,0 +1,119 @@ +package needle_map + +import ( + "fmt" + "os" + + "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/idx" + . "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" +) + +//This map uses in memory level db +type MemDb struct { + db *leveldb.DB +} + +func NewMemDb() *MemDb { + opts := &opt.Options{} + + var err error + t := &MemDb{} + if t.db, err = leveldb.Open(storage.NewMemStorage(), opts); err != nil { + glog.V(0).Infof("MemDb fails to open: %v", err) + return nil + } + + return t +} + +func (cm *MemDb) Set(key NeedleId, offset Offset, size uint32) error { + + bytes := ToBytes(key, offset, size) + + if err := cm.db.Put(bytes[0:NeedleIdSize], bytes[NeedleIdSize:NeedleIdSize+OffsetSize+SizeSize], nil); err != nil { + return fmt.Errorf("failed to write temp leveldb: %v", err) + } + return nil +} + +func (cm *MemDb) Delete(key NeedleId) error { + bytes := make([]byte, NeedleIdSize) + NeedleIdToBytes(bytes, key) + return cm.db.Delete(bytes, nil) + +} +func (cm *MemDb) Get(key NeedleId) (*NeedleValue, bool) { + bytes := make([]byte, NeedleIdSize) + NeedleIdToBytes(bytes[0:NeedleIdSize], key) + data, err := cm.db.Get(bytes, nil) + if err != nil || len(data) != OffsetSize+SizeSize { + return nil, false + } + offset := BytesToOffset(data[0:OffsetSize]) + size := util.BytesToUint32(data[OffsetSize : OffsetSize+SizeSize]) + return &NeedleValue{Key: key, Offset: offset, Size: size}, true +} + +// Visit visits all entries or stop if any error when visiting +func (cm *MemDb) AscendingVisit(visit func(NeedleValue) error) (ret error) { + iter := cm.db.NewIterator(nil, nil) + for iter.Next() { + key := BytesToNeedleId(iter.Key()) + data := iter.Value() + offset := BytesToOffset(data[0:OffsetSize]) + size := util.BytesToUint32(data[OffsetSize : OffsetSize+SizeSize]) + + needle := NeedleValue{Key: key, Offset: offset, Size: size} + ret = visit(needle) + if ret != nil { + return + } + } + iter.Release() + ret = iter.Error() + + return +} + +func (cm *MemDb) SaveToIdx(idxName string) (ret error) { + idxFile, err := os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return + } + defer idxFile.Close() + + return cm.AscendingVisit(func(value NeedleValue) error { + if value.Offset.IsZero() || value.Size == TombstoneFileSize { + return nil + } + _, err := idxFile.Write(value.ToBytes()) + return err + }) + +} + +func (cm *MemDb) LoadFromIdx(idxName string) (ret error) { + idxFile, err := os.OpenFile(idxName, os.O_RDONLY, 0644) + if err != nil { + return + } + defer idxFile.Close() + + return idx.WalkIndexFile(idxFile, func(key NeedleId, offset Offset, size uint32) error { + if offset.IsZero() || size == TombstoneFileSize { + return cm.Delete(key) + } + return cm.Set(key, offset, size) + }) + +} + +func (cm *MemDb) Close() { + cm.db.Close() +} diff --git a/weed/storage/needle_map/memdb_test.go b/weed/storage/needle_map/memdb_test.go new file mode 100644 index 000000000..7b45d23f8 --- /dev/null +++ b/weed/storage/needle_map/memdb_test.go @@ -0,0 +1,23 @@ +package needle_map + +import ( + "testing" + + "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +func BenchmarkMemDb(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + nm := NewMemDb() + + nid := types.NeedleId(345) + offset := types.Offset{ + OffsetHigher: types.OffsetHigher{}, + OffsetLower: types.OffsetLower{}, + } + nm.Set(nid, offset, 324) + nm.Close() + } + +} diff --git a/weed/storage/needle_map/needle_value.go b/weed/storage/needle_map/needle_value.go new file mode 100644 index 000000000..ef540b55e --- /dev/null +++ b/weed/storage/needle_map/needle_value.go @@ -0,0 +1,30 @@ +package needle_map + +import ( + . "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/util" + "github.com/google/btree" +) + +type NeedleValue struct { + Key NeedleId + Offset Offset `comment:"Volume offset"` //since aligned to 8 bytes, range is 4G*8=32G + Size uint32 `comment:"Size of the data portion"` +} + +func (this NeedleValue) Less(than btree.Item) bool { + that := than.(NeedleValue) + return this.Key < that.Key +} + +func (nv NeedleValue) ToBytes() []byte { + return ToBytes(nv.Key, nv.Offset, nv.Size) +} + +func ToBytes(key NeedleId, offset Offset, size uint32) []byte { + bytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize) + NeedleIdToBytes(bytes[0:NeedleIdSize], key) + OffsetToBytes(bytes[NeedleIdSize:NeedleIdSize+OffsetSize], offset) + util.Uint32toBytes(bytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], size) + return bytes +} diff --git a/weed/storage/needle/needle_value_map.go b/weed/storage/needle_map/needle_value_map.go index 9da257443..0a5a00ef7 100644 --- a/weed/storage/needle/needle_value_map.go +++ b/weed/storage/needle_map/needle_value_map.go @@ -1,4 +1,4 @@ -package needle +package needle_map import ( . "github.com/chrislusf/seaweedfs/weed/storage/types" @@ -8,5 +8,5 @@ type NeedleValueMap interface { Set(key NeedleId, offset Offset, size uint32) (oldOffset Offset, oldSize uint32) Delete(key NeedleId) uint32 Get(key NeedleId) (*NeedleValue, bool) - Visit(visit func(NeedleValue) error) error + AscendingVisit(visit func(NeedleValue) error) error } diff --git a/weed/storage/needle_map_boltdb.go b/weed/storage/needle_map_boltdb.go deleted file mode 100644 index a24c55a32..000000000 --- a/weed/storage/needle_map_boltdb.go +++ /dev/null @@ -1,185 +0,0 @@ -package storage - -import ( - "fmt" - "os" - - "github.com/boltdb/bolt" - - "errors" - "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/storage/needle" - . "github.com/chrislusf/seaweedfs/weed/storage/types" - "github.com/chrislusf/seaweedfs/weed/util" -) - -type BoltDbNeedleMap struct { - dbFileName string - db *bolt.DB - baseNeedleMapper -} - -var boltdbBucket = []byte("weed") - -var NotFound = errors.New("not found") - -func NewBoltDbNeedleMap(dbFileName string, indexFile *os.File) (m *BoltDbNeedleMap, err error) { - m = &BoltDbNeedleMap{dbFileName: dbFileName} - m.indexFile = indexFile - if !isBoltDbFresh(dbFileName, indexFile) { - glog.V(0).Infof("Start to Generate %s from %s", dbFileName, indexFile.Name()) - generateBoltDbFile(dbFileName, indexFile) - glog.V(0).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) - } - glog.V(1).Infof("Opening %s...", dbFileName) - if m.db, err = bolt.Open(dbFileName, 0644, nil); err != nil { - return - } - glog.V(1).Infof("Loading %s...", indexFile.Name()) - mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile) - if indexLoadError != nil { - return nil, indexLoadError - } - m.mapMetric = *mm - return -} - -func isBoltDbFresh(dbFileName string, indexFile *os.File) bool { - // normally we always write to index file first - dbLogFile, err := os.Open(dbFileName) - if err != nil { - return false - } - defer dbLogFile.Close() - dbStat, dbStatErr := dbLogFile.Stat() - indexStat, indexStatErr := indexFile.Stat() - if dbStatErr != nil || indexStatErr != nil { - glog.V(0).Infof("Can not stat file: %v and %v", dbStatErr, indexStatErr) - return false - } - - return dbStat.ModTime().After(indexStat.ModTime()) -} - -func generateBoltDbFile(dbFileName string, indexFile *os.File) error { - db, err := bolt.Open(dbFileName, 0644, nil) - if err != nil { - return err - } - defer db.Close() - return WalkIndexFile(indexFile, func(key NeedleId, offset Offset, size uint32) error { - if offset > 0 && size != TombstoneFileSize { - boltDbWrite(db, key, offset, size) - } else { - boltDbDelete(db, key) - } - return nil - }) -} - -func (m *BoltDbNeedleMap) Get(key NeedleId) (element *needle.NeedleValue, ok bool) { - var offset Offset - var size uint32 - bytes := make([]byte, NeedleIdSize) - NeedleIdToBytes(bytes, key) - err := m.db.View(func(tx *bolt.Tx) error { - bucket := tx.Bucket(boltdbBucket) - if bucket == nil { - return fmt.Errorf("Bucket %q not found!", boltdbBucket) - } - - data := bucket.Get(bytes) - - if len(data) == 0 { - return NotFound - } - - if len(data) != OffsetSize+SizeSize { - glog.V(0).Infof("key:%v has wrong data length: %d", key, len(data)) - return fmt.Errorf("key:%v has wrong data length: %d", key, len(data)) - } - - offset = BytesToOffset(data[0:OffsetSize]) - size = util.BytesToUint32(data[OffsetSize : OffsetSize+SizeSize]) - - return nil - }) - - if err != nil { - return nil, false - } - return &needle.NeedleValue{Key: key, Offset: offset, Size: size}, true -} - -func (m *BoltDbNeedleMap) Put(key NeedleId, offset Offset, size uint32) error { - var oldSize uint32 - if oldNeedle, ok := m.Get(key); ok { - oldSize = oldNeedle.Size - } - m.logPut(key, oldSize, size) - // write to index file first - if err := m.appendToIndexFile(key, offset, size); err != nil { - return fmt.Errorf("cannot write to indexfile %s: %v", m.indexFile.Name(), err) - } - return boltDbWrite(m.db, key, offset, size) -} - -func boltDbWrite(db *bolt.DB, - key NeedleId, offset Offset, size uint32) error { - - bytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize) - NeedleIdToBytes(bytes[0:NeedleIdSize], key) - OffsetToBytes(bytes[NeedleIdSize:NeedleIdSize+OffsetSize], offset) - util.Uint32toBytes(bytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], size) - - return db.Update(func(tx *bolt.Tx) error { - bucket, err := tx.CreateBucketIfNotExists(boltdbBucket) - if err != nil { - return err - } - - err = bucket.Put(bytes[0:NeedleIdSize], bytes[NeedleIdSize:NeedleIdSize+OffsetSize+SizeSize]) - if err != nil { - return err - } - return nil - }) -} -func boltDbDelete(db *bolt.DB, key NeedleId) error { - bytes := make([]byte, NeedleIdSize) - NeedleIdToBytes(bytes, key) - return db.Update(func(tx *bolt.Tx) error { - bucket, err := tx.CreateBucketIfNotExists(boltdbBucket) - if err != nil { - return err - } - - err = bucket.Delete(bytes) - if err != nil { - return err - } - return nil - }) -} - -func (m *BoltDbNeedleMap) Delete(key NeedleId, offset Offset) error { - if oldNeedle, ok := m.Get(key); ok { - m.logDelete(oldNeedle.Size) - } - // write to index file first - if err := m.appendToIndexFile(key, offset, TombstoneFileSize); err != nil { - return err - } - return boltDbDelete(m.db, key) -} - -func (m *BoltDbNeedleMap) Close() { - m.indexFile.Close() - m.db.Close() -} - -func (m *BoltDbNeedleMap) Destroy() error { - m.Close() - os.Remove(m.indexFile.Name()) - return os.Remove(m.dbFileName) -} diff --git a/weed/storage/needle_map_leveldb.go b/weed/storage/needle_map_leveldb.go index 77d29bd87..3bb258559 100644 --- a/weed/storage/needle_map_leveldb.go +++ b/weed/storage/needle_map_leveldb.go @@ -5,20 +5,23 @@ import ( "os" "path/filepath" + "github.com/chrislusf/seaweedfs/weed/storage/idx" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" . "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/chrislusf/seaweedfs/weed/util" "github.com/syndtr/goleveldb/leveldb" ) type LevelDbNeedleMap struct { + baseNeedleMapper dbFileName string db *leveldb.DB - baseNeedleMapper } -func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File) (m *LevelDbNeedleMap, err error) { +func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File, opts *opt.Options) (m *LevelDbNeedleMap, err error) { m = &LevelDbNeedleMap{dbFileName: dbFileName} m.indexFile = indexFile if !isLevelDbFresh(dbFileName, indexFile) { @@ -27,7 +30,8 @@ func NewLevelDbNeedleMap(dbFileName string, indexFile *os.File) (m *LevelDbNeedl glog.V(0).Infof("Finished Generating %s from %s", dbFileName, indexFile.Name()) } glog.V(1).Infof("Opening %s...", dbFileName) - if m.db, err = leveldb.OpenFile(dbFileName, nil); err != nil { + + if m.db, err = leveldb.OpenFile(dbFileName, opts); err != nil { return } glog.V(1).Infof("Loading %s...", indexFile.Name()) @@ -62,8 +66,8 @@ func generateLevelDbFile(dbFileName string, indexFile *os.File) error { return err } defer db.Close() - return WalkIndexFile(indexFile, func(key NeedleId, offset Offset, size uint32) error { - if offset > 0 && size != TombstoneFileSize { + return idx.WalkIndexFile(indexFile, func(key NeedleId, offset Offset, size uint32) error { + if !offset.IsZero() && size != TombstoneFileSize { levelDbWrite(db, key, offset, size) } else { levelDbDelete(db, key) @@ -72,7 +76,7 @@ func generateLevelDbFile(dbFileName string, indexFile *os.File) error { }) } -func (m *LevelDbNeedleMap) Get(key NeedleId) (element *needle.NeedleValue, ok bool) { +func (m *LevelDbNeedleMap) Get(key NeedleId) (element *needle_map.NeedleValue, ok bool) { bytes := make([]byte, NeedleIdSize) NeedleIdToBytes(bytes[0:NeedleIdSize], key) data, err := m.db.Get(bytes, nil) @@ -81,7 +85,7 @@ func (m *LevelDbNeedleMap) Get(key NeedleId) (element *needle.NeedleValue, ok bo } offset := BytesToOffset(data[0:OffsetSize]) size := util.BytesToUint32(data[OffsetSize : OffsetSize+SizeSize]) - return &needle.NeedleValue{Key: NeedleId(key), Offset: offset, Size: size}, true + return &needle_map.NeedleValue{Key: key, Offset: offset, Size: size}, true } func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size uint32) error { @@ -97,13 +101,9 @@ func (m *LevelDbNeedleMap) Put(key NeedleId, offset Offset, size uint32) error { return levelDbWrite(m.db, key, offset, size) } -func levelDbWrite(db *leveldb.DB, - key NeedleId, offset Offset, size uint32) error { +func levelDbWrite(db *leveldb.DB, key NeedleId, offset Offset, size uint32) error { - bytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize) - NeedleIdToBytes(bytes[0:NeedleIdSize], key) - OffsetToBytes(bytes[NeedleIdSize:NeedleIdSize+OffsetSize], offset) - util.Uint32toBytes(bytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], size) + bytes := needle_map.ToBytes(key, offset, size) if err := db.Put(bytes[0:NeedleIdSize], bytes[NeedleIdSize:NeedleIdSize+OffsetSize+SizeSize], nil); err != nil { return fmt.Errorf("failed to write leveldb: %v", err) @@ -128,8 +128,17 @@ func (m *LevelDbNeedleMap) Delete(key NeedleId, offset Offset) error { } func (m *LevelDbNeedleMap) Close() { - m.indexFile.Close() - m.db.Close() + indexFileName := m.indexFile.Name() + if err := m.indexFile.Sync(); err != nil { + glog.Warningf("sync file %s failed: %v", indexFileName, err) + } + if err := m.indexFile.Close(); err != nil { + glog.Warningf("close index file %s failed: %v", indexFileName, err) + } + + if err := m.db.Close(); err != nil { + glog.Warningf("close levelDB failed: %v", err) + } } func (m *LevelDbNeedleMap) Destroy() error { diff --git a/weed/storage/needle_map_memory.go b/weed/storage/needle_map_memory.go index fa5576c2b..84197912f 100644 --- a/weed/storage/needle_map_memory.go +++ b/weed/storage/needle_map_memory.go @@ -1,114 +1,60 @@ package storage import ( - "io" "os" "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/idx" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" . "github.com/chrislusf/seaweedfs/weed/storage/types" ) type NeedleMap struct { - m needle.NeedleValueMap - baseNeedleMapper + m needle_map.NeedleValueMap } func NewCompactNeedleMap(file *os.File) *NeedleMap { nm := &NeedleMap{ - m: needle.NewCompactMap(), + m: needle_map.NewCompactMap(), } nm.indexFile = file return nm } -func NewBtreeNeedleMap(file *os.File) *NeedleMap { - nm := &NeedleMap{ - m: needle.NewBtreeMap(), - } - nm.indexFile = file - return nm -} - -const ( - RowsToRead = 1024 -) - func LoadCompactNeedleMap(file *os.File) (*NeedleMap, error) { nm := NewCompactNeedleMap(file) return doLoading(file, nm) } -func LoadBtreeNeedleMap(file *os.File) (*NeedleMap, error) { - nm := NewBtreeNeedleMap(file) - return doLoading(file, nm) -} - func doLoading(file *os.File, nm *NeedleMap) (*NeedleMap, error) { - e := WalkIndexFile(file, func(key NeedleId, offset Offset, size uint32) error { - if key > nm.MaximumFileKey { - nm.MaximumFileKey = key - } - if offset > 0 && size != TombstoneFileSize { + e := idx.WalkIndexFile(file, func(key NeedleId, offset Offset, size uint32) error { + nm.MaybeSetMaxFileKey(key) + if !offset.IsZero() && size != TombstoneFileSize { nm.FileCounter++ nm.FileByteCounter = nm.FileByteCounter + uint64(size) oldOffset, oldSize := nm.m.Set(NeedleId(key), offset, size) - // glog.V(3).Infoln("reading key", key, "offset", offset*NeedlePaddingSize, "size", size, "oldSize", oldSize) - if oldOffset > 0 && oldSize != TombstoneFileSize { + if !oldOffset.IsZero() && oldSize != TombstoneFileSize { nm.DeletionCounter++ nm.DeletionByteCounter = nm.DeletionByteCounter + uint64(oldSize) } } else { oldSize := nm.m.Delete(NeedleId(key)) - // glog.V(3).Infoln("removing key", key, "offset", offset*NeedlePaddingSize, "size", size, "oldSize", oldSize) nm.DeletionCounter++ nm.DeletionByteCounter = nm.DeletionByteCounter + uint64(oldSize) } return nil }) - glog.V(1).Infof("max file key: %d for file: %s", nm.MaximumFileKey, file.Name()) + glog.V(1).Infof("max file key: %d for file: %s", nm.MaxFileKey(), file.Name()) return nm, e } -// walks through the index file, calls fn function with each key, offset, size -// stops with the error returned by the fn function -func WalkIndexFile(r *os.File, fn func(key NeedleId, offset Offset, size uint32) error) error { - var readerOffset int64 - bytes := make([]byte, NeedleEntrySize*RowsToRead) - count, e := r.ReadAt(bytes, readerOffset) - glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e) - readerOffset += int64(count) - var ( - key NeedleId - offset Offset - size uint32 - i int - ) - - for count > 0 && e == nil || e == io.EOF { - for i = 0; i+NeedleEntrySize <= count; i += NeedleEntrySize { - key, offset, size = IdxFileEntry(bytes[i : i+NeedleEntrySize]) - if e = fn(key, offset, size); e != nil { - return e - } - } - if e == io.EOF { - return nil - } - count, e = r.ReadAt(bytes, readerOffset) - glog.V(3).Infoln("file", r.Name(), "readerOffset", readerOffset, "count", count, "e", e) - readerOffset += int64(count) - } - return e -} - func (nm *NeedleMap) Put(key NeedleId, offset Offset, size uint32) error { _, oldSize := nm.m.Set(NeedleId(key), offset, size) nm.logPut(key, oldSize, size) return nm.appendToIndexFile(key, offset, size) } -func (nm *NeedleMap) Get(key NeedleId) (element *needle.NeedleValue, ok bool) { +func (nm *NeedleMap) Get(key NeedleId) (element *needle_map.NeedleValue, ok bool) { element, ok = nm.m.Get(NeedleId(key)) return } @@ -118,6 +64,10 @@ func (nm *NeedleMap) Delete(key NeedleId, offset Offset) error { return nm.appendToIndexFile(key, offset, TombstoneFileSize) } func (nm *NeedleMap) Close() { + indexFileName := nm.indexFile.Name() + if err := nm.indexFile.Sync(); err != nil { + glog.Warningf("sync file %s failed, %v", indexFileName, err) + } _ = nm.indexFile.Close() } func (nm *NeedleMap) Destroy() error { diff --git a/weed/storage/needle_map_metric.go b/weed/storage/needle_map_metric.go index cc3d9e028..823a04108 100644 --- a/weed/storage/needle_map_metric.go +++ b/weed/storage/needle_map_metric.go @@ -2,50 +2,93 @@ package storage import ( "fmt" + "os" + "sync/atomic" + + "github.com/chrislusf/seaweedfs/weed/storage/idx" . "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/willf/bloom" - "os" ) type mapMetric struct { - DeletionCounter int `json:"DeletionCounter"` - FileCounter int `json:"FileCounter"` - DeletionByteCounter uint64 `json:"DeletionByteCounter"` - FileByteCounter uint64 `json:"FileByteCounter"` - MaximumFileKey NeedleId `json:"MaxFileKey"` + DeletionCounter uint32 `json:"DeletionCounter"` + FileCounter uint32 `json:"FileCounter"` + DeletionByteCounter uint64 `json:"DeletionByteCounter"` + FileByteCounter uint64 `json:"FileByteCounter"` + MaximumFileKey uint64 `json:"MaxFileKey"` } func (mm *mapMetric) logDelete(deletedByteCount uint32) { - mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(deletedByteCount) - mm.DeletionCounter++ + if mm == nil { + return + } + mm.LogDeletionCounter(deletedByteCount) } func (mm *mapMetric) logPut(key NeedleId, oldSize uint32, newSize uint32) { - if key > mm.MaximumFileKey { - mm.MaximumFileKey = key + if mm == nil { + return + } + mm.MaybeSetMaxFileKey(key) + mm.LogFileCounter(newSize) + if oldSize > 0 && oldSize != TombstoneFileSize { + mm.LogDeletionCounter(oldSize) + } +} +func (mm *mapMetric) LogFileCounter(newSize uint32) { + if mm == nil { + return + } + atomic.AddUint32(&mm.FileCounter, 1) + atomic.AddUint64(&mm.FileByteCounter, uint64(newSize)) +} +func (mm *mapMetric) LogDeletionCounter(oldSize uint32) { + if mm == nil { + return } - mm.FileCounter++ - mm.FileByteCounter = mm.FileByteCounter + uint64(newSize) if oldSize > 0 { - mm.DeletionCounter++ - mm.DeletionByteCounter = mm.DeletionByteCounter + uint64(oldSize) + atomic.AddUint32(&mm.DeletionCounter, 1) + atomic.AddUint64(&mm.DeletionByteCounter, uint64(oldSize)) } } - -func (mm mapMetric) ContentSize() uint64 { - return mm.FileByteCounter +func (mm *mapMetric) ContentSize() uint64 { + if mm == nil { + return 0 + } + return atomic.LoadUint64(&mm.FileByteCounter) } -func (mm mapMetric) DeletedSize() uint64 { - return mm.DeletionByteCounter +func (mm *mapMetric) DeletedSize() uint64 { + if mm == nil { + return 0 + } + return atomic.LoadUint64(&mm.DeletionByteCounter) } -func (mm mapMetric) FileCount() int { - return mm.FileCounter +func (mm *mapMetric) FileCount() int { + if mm == nil { + return 0 + } + return int(atomic.LoadUint32(&mm.FileCounter)) } -func (mm mapMetric) DeletedCount() int { - return mm.DeletionCounter +func (mm *mapMetric) DeletedCount() int { + if mm == nil { + return 0 + } + return int(atomic.LoadUint32(&mm.DeletionCounter)) } -func (mm mapMetric) MaxFileKey() NeedleId { - return mm.MaximumFileKey +func (mm *mapMetric) MaxFileKey() NeedleId { + if mm == nil { + return 0 + } + t := uint64(mm.MaximumFileKey) + return Uint64ToNeedleId(t) +} +func (mm *mapMetric) MaybeSetMaxFileKey(key NeedleId) { + if mm == nil { + return + } + if key > mm.MaxFileKey() { + atomic.StoreUint64(&mm.MaximumFileKey, uint64(key)) + } } func newNeedleMapMetricFromIndexFile(r *os.File) (mm *mapMetric, err error) { @@ -56,9 +99,7 @@ func newNeedleMapMetricFromIndexFile(r *os.File) (mm *mapMetric, err error) { bf = bloom.NewWithEstimates(uint(entryCount), 0.001) }, func(key NeedleId, offset Offset, size uint32) error { - if key > mm.MaximumFileKey { - mm.MaximumFileKey = key - } + mm.MaybeSetMaxFileKey(key) NeedleIdToBytes(buf, key) if size != TombstoneFileSize { mm.FileByteCounter += uint64(size) @@ -86,16 +127,16 @@ func reverseWalkIndexFile(r *os.File, initFn func(entryCount int64), fn func(key return fmt.Errorf("file %s stat error: %v", r.Name(), err) } fileSize := fi.Size() - if fileSize%NeedleEntrySize != 0 { + if fileSize%NeedleMapEntrySize != 0 { return fmt.Errorf("unexpected file %s size: %d", r.Name(), fileSize) } - entryCount := fileSize / NeedleEntrySize + entryCount := fileSize / NeedleMapEntrySize initFn(entryCount) batchSize := int64(1024 * 4) - bytes := make([]byte, NeedleEntrySize*batchSize) + bytes := make([]byte, NeedleMapEntrySize*batchSize) nextBatchSize := entryCount % batchSize if nextBatchSize == 0 { nextBatchSize = batchSize @@ -103,13 +144,13 @@ func reverseWalkIndexFile(r *os.File, initFn func(entryCount int64), fn func(key remainingCount := entryCount - nextBatchSize for remainingCount >= 0 { - _, e := r.ReadAt(bytes[:NeedleEntrySize*nextBatchSize], NeedleEntrySize*remainingCount) - // glog.V(0).Infoln("file", r.Name(), "readerOffset", NeedleEntrySize*remainingCount, "count", count, "e", e) + _, e := r.ReadAt(bytes[:NeedleMapEntrySize*nextBatchSize], NeedleMapEntrySize*remainingCount) + // glog.V(0).Infoln("file", r.Name(), "readerOffset", NeedleMapEntrySize*remainingCount, "count", count, "e", e) if e != nil { return e } for i := int(nextBatchSize) - 1; i >= 0; i-- { - key, offset, size := IdxFileEntry(bytes[i*NeedleEntrySize : i*NeedleEntrySize+NeedleEntrySize]) + key, offset, size := idx.IdxFileEntry(bytes[i*NeedleMapEntrySize : i*NeedleMapEntrySize+NeedleMapEntrySize]) if e = fn(key, offset, size); e != nil { return e } diff --git a/weed/storage/needle_map_metric_test.go b/weed/storage/needle_map_metric_test.go index 539f83a87..ae2177a30 100644 --- a/weed/storage/needle_map_metric_test.go +++ b/weed/storage/needle_map_metric_test.go @@ -1,17 +1,18 @@ package storage import ( - "github.com/chrislusf/seaweedfs/weed/glog" - . "github.com/chrislusf/seaweedfs/weed/storage/types" "io/ioutil" "math/rand" "testing" + + "github.com/chrislusf/seaweedfs/weed/glog" + . "github.com/chrislusf/seaweedfs/weed/storage/types" ) func TestFastLoadingNeedleMapMetrics(t *testing.T) { idxFile, _ := ioutil.TempFile("", "tmp.idx") - nm := NewBtreeNeedleMap(idxFile) + nm := NewCompactNeedleMap(idxFile) for i := 0; i < 10000; i++ { nm.Put(Uint64ToNeedleId(uint64(i+1)), Uint32ToOffset(uint32(0)), uint32(1)) diff --git a/weed/storage/needle_map_sorted_file.go b/weed/storage/needle_map_sorted_file.go new file mode 100644 index 000000000..e6f9258f3 --- /dev/null +++ b/weed/storage/needle_map_sorted_file.go @@ -0,0 +1,105 @@ +package storage + +import ( + "os" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" + . "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +type SortedFileNeedleMap struct { + baseNeedleMapper + baseFileName string + dbFile *os.File + dbFileSize int64 +} + +func NewSortedFileNeedleMap(baseFileName string, indexFile *os.File) (m *SortedFileNeedleMap, err error) { + m = &SortedFileNeedleMap{baseFileName: baseFileName} + m.indexFile = indexFile + fileName := baseFileName + ".sdx" + if !isSortedFileFresh(fileName, indexFile) { + glog.V(0).Infof("Start to Generate %s from %s", fileName, indexFile.Name()) + erasure_coding.WriteSortedFileFromIdx(baseFileName, ".sdx") + glog.V(0).Infof("Finished Generating %s from %s", fileName, indexFile.Name()) + } + glog.V(1).Infof("Opening %s...", fileName) + + if m.dbFile, err = os.Open(baseFileName + ".sdx"); err != nil { + return + } + dbStat, _ := m.dbFile.Stat() + m.dbFileSize = dbStat.Size() + glog.V(1).Infof("Loading %s...", indexFile.Name()) + mm, indexLoadError := newNeedleMapMetricFromIndexFile(indexFile) + if indexLoadError != nil { + return nil, indexLoadError + } + m.mapMetric = *mm + return +} + +func isSortedFileFresh(dbFileName string, indexFile *os.File) bool { + // normally we always write to index file first + dbFile, err := os.Open(dbFileName) + if err != nil { + return false + } + defer dbFile.Close() + dbStat, dbStatErr := dbFile.Stat() + indexStat, indexStatErr := indexFile.Stat() + if dbStatErr != nil || indexStatErr != nil { + glog.V(0).Infof("Can not stat file: %v and %v", dbStatErr, indexStatErr) + return false + } + + return dbStat.ModTime().After(indexStat.ModTime()) +} + +func (m *SortedFileNeedleMap) Get(key NeedleId) (element *needle_map.NeedleValue, ok bool) { + offset, size, err := erasure_coding.SearchNeedleFromSortedIndex(m.dbFile, m.dbFileSize, key, nil) + ok = err == nil + return &needle_map.NeedleValue{Key: key, Offset: offset, Size: size}, ok + +} + +func (m *SortedFileNeedleMap) Put(key NeedleId, offset Offset, size uint32) error { + return os.ErrInvalid +} + +func (m *SortedFileNeedleMap) Delete(key NeedleId, offset Offset) error { + + _, size, err := erasure_coding.SearchNeedleFromSortedIndex(m.dbFile, m.dbFileSize, key, nil) + + if err != nil { + if err == erasure_coding.NotFoundError { + return nil + } + return err + } + + if size == TombstoneFileSize { + return nil + } + + // write to index file first + if err := m.appendToIndexFile(key, offset, TombstoneFileSize); err != nil { + return err + } + _, _, err = erasure_coding.SearchNeedleFromSortedIndex(m.dbFile, m.dbFileSize, key, erasure_coding.MarkNeedleDeleted) + + return err +} + +func (m *SortedFileNeedleMap) Close() { + m.indexFile.Close() + m.dbFile.Close() +} + +func (m *SortedFileNeedleMap) Destroy() error { + m.Close() + os.Remove(m.indexFile.Name()) + return os.Remove(m.baseFileName + ".sdx") +} diff --git a/weed/storage/needle_parse_multipart.go b/weed/storage/needle_parse_multipart.go deleted file mode 100644 index d42bc4629..000000000 --- a/weed/storage/needle_parse_multipart.go +++ /dev/null @@ -1,103 +0,0 @@ -package storage - -import ( - "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/operation" - "io/ioutil" - "mime" - "net/http" - "path" - "strconv" - "strings" -) - -func parseMultipart(r *http.Request) ( - fileName string, data []byte, mimeType string, isGzipped, isChunkedFile bool, e error) { - form, fe := r.MultipartReader() - if fe != nil { - glog.V(0).Infoln("MultipartReader [ERROR]", fe) - e = fe - return - } - - //first multi-part item - part, fe := form.NextPart() - if fe != nil { - glog.V(0).Infoln("Reading Multi part [ERROR]", fe) - e = fe - return - } - - fileName = part.FileName() - if fileName != "" { - fileName = path.Base(fileName) - } - - data, e = ioutil.ReadAll(part) - if e != nil { - glog.V(0).Infoln("Reading Content [ERROR]", e) - return - } - - //if the filename is empty string, do a search on the other multi-part items - for fileName == "" { - part2, fe := form.NextPart() - if fe != nil { - break // no more or on error, just safely break - } - - fName := part2.FileName() - - //found the first <file type> multi-part has filename - if fName != "" { - data2, fe2 := ioutil.ReadAll(part2) - if fe2 != nil { - glog.V(0).Infoln("Reading Content [ERROR]", fe2) - e = fe2 - return - } - - //update - data = data2 - fileName = path.Base(fName) - break - } - } - - isChunkedFile, _ = strconv.ParseBool(r.FormValue("cm")) - - if !isChunkedFile { - - dotIndex := strings.LastIndex(fileName, ".") - ext, mtype := "", "" - if dotIndex > 0 { - ext = strings.ToLower(fileName[dotIndex:]) - mtype = mime.TypeByExtension(ext) - } - contentType := part.Header.Get("Content-Type") - if contentType != "" && mtype != contentType { - mimeType = contentType //only return mime type if not deductable - mtype = contentType - } - - if part.Header.Get("Content-Encoding") == "gzip" { - isGzipped = true - } else if operation.IsGzippable(ext, mtype) { - if data, e = operation.GzipData(data); e != nil { - return - } - isGzipped = true - } - if ext == ".gz" { - if strings.HasSuffix(fileName, ".css.gz") || - strings.HasSuffix(fileName, ".html.gz") || - strings.HasSuffix(fileName, ".txt.gz") || - strings.HasSuffix(fileName, ".js.gz") { - fileName = fileName[:len(fileName)-3] - isGzipped = true - } - } - } - - return -} diff --git a/weed/storage/replica_placement_test.go b/weed/storage/replica_placement_test.go deleted file mode 100644 index 9c2161e94..000000000 --- a/weed/storage/replica_placement_test.go +++ /dev/null @@ -1,14 +0,0 @@ -package storage - -import ( - "testing" -) - -func TestReplicaPlacemnetSerialDeserial(t *testing.T) { - rp, _ := NewReplicaPlacementFromString("001") - new_rp, _ := NewReplicaPlacementFromByte(rp.Byte()) - if rp.String() != new_rp.String() { - println("expected:", rp.String(), "actual:", new_rp.String()) - t.Fail() - } -} diff --git a/weed/storage/store.go b/weed/storage/store.go index 5b6270b6b..e29680f6f 100644 --- a/weed/storage/store.go +++ b/weed/storage/store.go @@ -2,8 +2,18 @@ package storage import ( "fmt" + "path/filepath" + "strings" + "sync/atomic" + + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" . "github.com/chrislusf/seaweedfs/weed/storage/types" ) @@ -15,6 +25,9 @@ const ( * A VolumeServer contains one Store */ type Store struct { + MasterAddress string + grpcDialOption grpc.DialOption + volumeSizeLimit uint64 //read from the master Ip string Port int PublicUrl string @@ -22,40 +35,45 @@ type Store struct { dataCenter string //optional informaton, overwriting master setting if exists rack string //optional information, overwriting master setting if exists connected bool - VolumeSizeLimit uint64 //read from the master - Client master_pb.Seaweed_SendHeartbeatClient NeedleMapType NeedleMapType - NewVolumeIdChan chan VolumeId - DeletedVolumeIdChan chan VolumeId + NewVolumesChan chan master_pb.VolumeShortInformationMessage + DeletedVolumesChan chan master_pb.VolumeShortInformationMessage + NewEcShardsChan chan master_pb.VolumeEcShardInformationMessage + DeletedEcShardsChan chan master_pb.VolumeEcShardInformationMessage } func (s *Store) String() (str string) { - str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.VolumeSizeLimit) + str = fmt.Sprintf("Ip:%s, Port:%d, PublicUrl:%s, dataCenter:%s, rack:%s, connected:%v, volumeSizeLimit:%d", s.Ip, s.Port, s.PublicUrl, s.dataCenter, s.rack, s.connected, s.GetVolumeSizeLimit()) return } -func NewStore(port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int, needleMapKind NeedleMapType) (s *Store) { - s = &Store{Port: port, Ip: ip, PublicUrl: publicUrl, NeedleMapType: needleMapKind} +func NewStore(grpcDialOption grpc.DialOption, port int, ip, publicUrl string, dirnames []string, maxVolumeCounts []int, needleMapKind NeedleMapType) (s *Store) { + s = &Store{grpcDialOption: grpcDialOption, Port: port, Ip: ip, PublicUrl: publicUrl, NeedleMapType: needleMapKind} s.Locations = make([]*DiskLocation, 0) for i := 0; i < len(dirnames); i++ { location := NewDiskLocation(dirnames[i], maxVolumeCounts[i]) location.loadExistingVolumes(needleMapKind) s.Locations = append(s.Locations, location) + stats.VolumeServerMaxVolumeCounter.Add(float64(maxVolumeCounts[i])) } - s.NewVolumeIdChan = make(chan VolumeId, 3) - s.DeletedVolumeIdChan = make(chan VolumeId, 3) + s.NewVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3) + s.DeletedVolumesChan = make(chan master_pb.VolumeShortInformationMessage, 3) + + s.NewEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3) + s.DeletedEcShardsChan = make(chan master_pb.VolumeEcShardInformationMessage, 3) + return } -func (s *Store) AddVolume(volumeId VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement string, ttlString string, preallocate int64) error { - rt, e := NewReplicaPlacementFromString(replicaPlacement) +func (s *Store) AddVolume(volumeId needle.VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement string, ttlString string, preallocate int64, MemoryMapMaxSizeMb uint32) error { + rt, e := super_block.NewReplicaPlacementFromString(replicaPlacement) if e != nil { return e } - ttl, e := ReadTTL(ttlString) + ttl, e := needle.ReadTTL(ttlString) if e != nil { return e } - e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate) + e = s.addVolume(volumeId, collection, needleMapKind, rt, ttl, preallocate, MemoryMapMaxSizeMb) return e } func (s *Store) DeleteCollection(collection string) (e error) { @@ -64,12 +82,12 @@ func (s *Store) DeleteCollection(collection string) (e error) { if e != nil { return } - // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumeIdChan + // let the heartbeat send the list of volumes, instead of sending the deleted volume ids to DeletedVolumesChan } return } -func (s *Store) findVolume(vid VolumeId) *Volume { +func (s *Store) findVolume(vid needle.VolumeId) *Volume { for _, location := range s.Locations { if v, found := location.FindVolume(vid); found { return v @@ -77,7 +95,7 @@ func (s *Store) findVolume(vid VolumeId) *Volume { } return nil } -func (s *Store) findFreeLocation() (ret *DiskLocation) { +func (s *Store) FindFreeLocation() (ret *DiskLocation) { max := 0 for _, location := range s.Locations { currentFreeCount := location.MaxVolumeCount - location.VolumesLen() @@ -88,16 +106,23 @@ func (s *Store) findFreeLocation() (ret *DiskLocation) { } return ret } -func (s *Store) addVolume(vid VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement *ReplicaPlacement, ttl *TTL, preallocate int64) error { +func (s *Store) addVolume(vid needle.VolumeId, collection string, needleMapKind NeedleMapType, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32) error { if s.findVolume(vid) != nil { return fmt.Errorf("Volume Id %d already exists!", vid) } - if location := s.findFreeLocation(); location != nil { + if location := s.FindFreeLocation(); location != nil { glog.V(0).Infof("In dir %s adds volume:%v collection:%s replicaPlacement:%v ttl:%v", location.Directory, vid, collection, replicaPlacement, ttl) - if volume, err := NewVolume(location.Directory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate); err == nil { + if volume, err := NewVolume(location.Directory, collection, vid, needleMapKind, replicaPlacement, ttl, preallocate, memoryMapMaxSizeMb); err == nil { location.SetVolume(vid, volume) - s.NewVolumeIdChan <- vid + glog.V(0).Infof("add volume %d", vid) + s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{ + Id: uint32(vid), + Collection: collection, + ReplicaPlacement: uint32(replicaPlacement.Byte()), + Version: uint32(volume.Version()), + Ttl: ttl.ToUint32(), + } return nil } else { return err @@ -106,25 +131,28 @@ func (s *Store) addVolume(vid VolumeId, collection string, needleMapKind NeedleM return fmt.Errorf("No more free space left") } -func (s *Store) Status() []*VolumeInfo { +func (s *Store) VolumeInfos() []*VolumeInfo { var stats []*VolumeInfo for _, location := range s.Locations { - location.RLock() + location.volumesLock.RLock() for k, v := range location.volumes { s := &VolumeInfo{ - Id: VolumeId(k), + Id: needle.VolumeId(k), Size: v.ContentSize(), Collection: v.Collection, ReplicaPlacement: v.ReplicaPlacement, Version: v.Version(), - FileCount: v.nm.FileCount(), - DeleteCount: v.nm.DeletedCount(), - DeletedByteCount: v.nm.DeletedSize(), - ReadOnly: v.readOnly, - Ttl: v.Ttl} + FileCount: int(v.FileCount()), + DeleteCount: int(v.DeletedCount()), + DeletedByteCount: v.DeletedSize(), + ReadOnly: v.noWriteOrDelete || v.noWriteCanDelete, + Ttl: v.Ttl, + CompactRevision: uint32(v.CompactionRevision), + } + s.RemoteStorageName, s.RemoteStorageKey = v.RemoteStorageNameKey() stats = append(stats, s) } - location.RUnlock() + location.volumesLock.RUnlock() } sortVolumeInfos(stats) return stats @@ -141,37 +169,42 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat { var volumeMessages []*master_pb.VolumeInformationMessage maxVolumeCount := 0 var maxFileKey NeedleId + collectionVolumeSize := make(map[string]uint64) for _, location := range s.Locations { + var deleteVids []needle.VolumeId maxVolumeCount = maxVolumeCount + location.MaxVolumeCount - location.Lock() - for k, v := range location.volumes { - if maxFileKey < v.nm.MaxFileKey() { - maxFileKey = v.nm.MaxFileKey() + location.volumesLock.RLock() + for _, v := range location.volumes { + if maxFileKey < v.MaxFileKey() { + maxFileKey = v.MaxFileKey() } - if !v.expired(s.VolumeSizeLimit) { - volumeMessage := &master_pb.VolumeInformationMessage{ - Id: uint32(k), - Size: uint64(v.Size()), - Collection: v.Collection, - FileCount: uint64(v.nm.FileCount()), - DeleteCount: uint64(v.nm.DeletedCount()), - DeletedByteCount: v.nm.DeletedSize(), - ReadOnly: v.readOnly, - ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()), - Version: uint32(v.Version()), - Ttl: v.Ttl.ToUint32(), - } - volumeMessages = append(volumeMessages, volumeMessage) + if !v.expired(s.GetVolumeSizeLimit()) { + volumeMessages = append(volumeMessages, v.ToVolumeInformationMessage()) } else { - if v.exiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) { - location.deleteVolumeById(v.Id) - glog.V(0).Infoln("volume", v.Id, "is deleted.") + if v.expiredLongEnough(MAX_TTL_VOLUME_REMOVAL_DELAY) { + deleteVids = append(deleteVids, v.Id) } else { glog.V(0).Infoln("volume", v.Id, "is expired.") } } + fileSize, _, _ := v.FileStat() + collectionVolumeSize[v.Collection] += fileSize + } + location.volumesLock.RUnlock() + + if len(deleteVids) > 0 { + // delete expired volumes. + location.volumesLock.Lock() + for _, vid := range deleteVids { + location.deleteVolumeById(vid) + glog.V(0).Infoln("volume", vid, "is deleted.") + } + location.volumesLock.Unlock() } - location.Unlock() + } + + for col, size := range collectionVolumeSize { + stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "normal").Set(float64(size)) } return &master_pb.Heartbeat{ @@ -183,74 +216,168 @@ func (s *Store) CollectHeartbeat() *master_pb.Heartbeat { DataCenter: s.dataCenter, Rack: s.rack, Volumes: volumeMessages, + HasNoVolumes: len(volumeMessages) == 0, } } + func (s *Store) Close() { for _, location := range s.Locations { location.Close() } } -func (s *Store) Write(i VolumeId, n *Needle) (size uint32, err error) { +func (s *Store) WriteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (size uint32, isUnchanged bool, err error) { if v := s.findVolume(i); v != nil { - if v.readOnly { - err = fmt.Errorf("Volume %d is read only", i) + if v.noWriteOrDelete || v.noWriteCanDelete { + err = fmt.Errorf("volume %d is read only", i) return } - // TODO: count needle size ahead - if MaxPossibleVolumeSize >= v.ContentSize()+uint64(size) { - size, err = v.writeNeedle(n) + if MaxPossibleVolumeSize >= v.ContentSize()+uint64(needle.GetActualSize(size, v.Version())) { + _, size, isUnchanged, err = v.writeNeedle(n) } else { - err = fmt.Errorf("Volume Size Limit %d Exceeded! Current size is %d", s.VolumeSizeLimit, v.ContentSize()) + err = fmt.Errorf("volume size limit %d exceeded! current size is %d", s.GetVolumeSizeLimit(), v.ContentSize()) } return } glog.V(0).Infoln("volume", i, "not found!") - err = fmt.Errorf("Volume %d not found!", i) + err = fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port) return } -func (s *Store) Delete(i VolumeId, n *Needle) (uint32, error) { - if v := s.findVolume(i); v != nil && !v.readOnly { - return v.deleteNeedle(n) +func (s *Store) DeleteVolumeNeedle(i needle.VolumeId, n *needle.Needle) (uint32, error) { + if v := s.findVolume(i); v != nil { + if v.noWriteOrDelete { + return 0, fmt.Errorf("volume %d is read only", i) + } + if MaxPossibleVolumeSize >= v.ContentSize()+uint64(needle.GetActualSize(0, v.Version())) { + return v.deleteNeedle(n) + } else { + return 0, fmt.Errorf("volume size limit %d exceeded! current size is %d", s.GetVolumeSizeLimit(), v.ContentSize()) + } } - return 0, nil + return 0, fmt.Errorf("volume %d not found on %s:%d", i, s.Ip, s.Port) } -func (s *Store) ReadVolumeNeedle(i VolumeId, n *Needle) (int, error) { +func (s *Store) ReadVolumeNeedle(i needle.VolumeId, n *needle.Needle) (int, error) { if v := s.findVolume(i); v != nil { return v.readNeedle(n) } - return 0, fmt.Errorf("Volume %d not found!", i) + return 0, fmt.Errorf("volume %d not found", i) } -func (s *Store) GetVolume(i VolumeId) *Volume { +func (s *Store) GetVolume(i needle.VolumeId) *Volume { return s.findVolume(i) } -func (s *Store) HasVolume(i VolumeId) bool { +func (s *Store) HasVolume(i needle.VolumeId) bool { v := s.findVolume(i) return v != nil } -func (s *Store) MountVolume(i VolumeId) error { +func (s *Store) MarkVolumeReadonly(i needle.VolumeId) error { + v := s.findVolume(i) + if v == nil { + return fmt.Errorf("volume %d not found", i) + } + v.noWriteOrDelete = true + return nil +} + +func (s *Store) MountVolume(i needle.VolumeId) error { for _, location := range s.Locations { if found := location.LoadVolume(i, s.NeedleMapType); found == true { - s.NewVolumeIdChan <- VolumeId(i) + glog.V(0).Infof("mount volume %d", i) + v := s.findVolume(i) + s.NewVolumesChan <- master_pb.VolumeShortInformationMessage{ + Id: uint32(v.Id), + Collection: v.Collection, + ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()), + Version: uint32(v.Version()), + Ttl: v.Ttl.ToUint32(), + } return nil } } - return fmt.Errorf("Volume %d not found on disk", i) + return fmt.Errorf("volume %d not found on disk", i) } -func (s *Store) UnmountVolume(i VolumeId) error { +func (s *Store) UnmountVolume(i needle.VolumeId) error { + v := s.findVolume(i) + if v == nil { + return nil + } + message := master_pb.VolumeShortInformationMessage{ + Id: uint32(v.Id), + Collection: v.Collection, + ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()), + Version: uint32(v.Version()), + Ttl: v.Ttl.ToUint32(), + } + for _, location := range s.Locations { if err := location.UnloadVolume(i); err == nil { - s.DeletedVolumeIdChan <- VolumeId(i) + glog.V(0).Infof("UnmountVolume %d", i) + s.DeletedVolumesChan <- message + return nil + } + } + + return fmt.Errorf("volume %d not found on disk", i) +} + +func (s *Store) DeleteVolume(i needle.VolumeId) error { + v := s.findVolume(i) + if v == nil { + return nil + } + message := master_pb.VolumeShortInformationMessage{ + Id: uint32(v.Id), + Collection: v.Collection, + ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()), + Version: uint32(v.Version()), + Ttl: v.Ttl.ToUint32(), + } + for _, location := range s.Locations { + if error := location.deleteVolumeById(i); error == nil { + glog.V(0).Infof("DeleteVolume %d", i) + s.DeletedVolumesChan <- message return nil } } - return fmt.Errorf("Volume %d not found on disk", i) + return fmt.Errorf("volume %d not found on disk", i) +} + +func (s *Store) ConfigureVolume(i needle.VolumeId, replication string) error { + + for _, location := range s.Locations { + fileInfo, found := location.LocateVolume(i) + if !found { + continue + } + // load, modify, save + baseFileName := strings.TrimSuffix(fileInfo.Name(), filepath.Ext(fileInfo.Name())) + vifFile := filepath.Join(location.Directory, baseFileName+".vif") + volumeInfo, _, err := pb.MaybeLoadVolumeInfo(vifFile) + if err != nil { + return fmt.Errorf("volume %d fail to load vif", i) + } + volumeInfo.Replication = replication + err = pb.SaveVolumeInfo(vifFile, volumeInfo) + if err != nil { + return fmt.Errorf("volume %d fail to save vif", i) + } + return nil + } + + return fmt.Errorf("volume %d not found on disk", i) +} + +func (s *Store) SetVolumeSizeLimit(x uint64) { + atomic.StoreUint64(&s.volumeSizeLimit, x) +} + +func (s *Store) GetVolumeSizeLimit() uint64 { + return atomic.LoadUint64(&s.volumeSizeLimit) } diff --git a/weed/storage/store_ec.go b/weed/storage/store_ec.go new file mode 100644 index 000000000..e423e7dca --- /dev/null +++ b/weed/storage/store_ec.go @@ -0,0 +1,387 @@ +package storage + +import ( + "context" + "fmt" + "io" + "sort" + "sync" + "time" + + "github.com/klauspost/reedsolomon" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +func (s *Store) CollectErasureCodingHeartbeat() *master_pb.Heartbeat { + var ecShardMessages []*master_pb.VolumeEcShardInformationMessage + collectionEcShardSize := make(map[string]int64) + for _, location := range s.Locations { + location.ecVolumesLock.RLock() + for _, ecShards := range location.ecVolumes { + ecShardMessages = append(ecShardMessages, ecShards.ToVolumeEcShardInformationMessage()...) + + for _, ecShard := range ecShards.Shards { + collectionEcShardSize[ecShards.Collection] += ecShard.Size() + } + } + location.ecVolumesLock.RUnlock() + } + + for col, size := range collectionEcShardSize { + stats.VolumeServerDiskSizeGauge.WithLabelValues(col, "ec").Set(float64(size)) + } + + return &master_pb.Heartbeat{ + EcShards: ecShardMessages, + HasNoEcShards: len(ecShardMessages) == 0, + } + +} + +func (s *Store) MountEcShards(collection string, vid needle.VolumeId, shardId erasure_coding.ShardId) error { + for _, location := range s.Locations { + if err := location.LoadEcShard(collection, vid, shardId); err == nil { + glog.V(0).Infof("MountEcShards %d.%d", vid, shardId) + + var shardBits erasure_coding.ShardBits + + s.NewEcShardsChan <- master_pb.VolumeEcShardInformationMessage{ + Id: uint32(vid), + Collection: collection, + EcIndexBits: uint32(shardBits.AddShardId(shardId)), + } + return nil + } else { + return fmt.Errorf("%s load ec shard %d.%d: %v", location.Directory, vid, shardId, err) + } + } + + return fmt.Errorf("MountEcShards %d.%d not found on disk", vid, shardId) +} + +func (s *Store) UnmountEcShards(vid needle.VolumeId, shardId erasure_coding.ShardId) error { + + ecShard, found := s.findEcShard(vid, shardId) + if !found { + return nil + } + + var shardBits erasure_coding.ShardBits + message := master_pb.VolumeEcShardInformationMessage{ + Id: uint32(vid), + Collection: ecShard.Collection, + EcIndexBits: uint32(shardBits.AddShardId(shardId)), + } + + for _, location := range s.Locations { + if deleted := location.UnloadEcShard(vid, shardId); deleted { + glog.V(0).Infof("UnmountEcShards %d.%d", vid, shardId) + s.DeletedEcShardsChan <- message + return nil + } + } + + return fmt.Errorf("UnmountEcShards %d.%d not found on disk", vid, shardId) +} + +func (s *Store) findEcShard(vid needle.VolumeId, shardId erasure_coding.ShardId) (*erasure_coding.EcVolumeShard, bool) { + for _, location := range s.Locations { + if v, found := location.FindEcShard(vid, shardId); found { + return v, found + } + } + return nil, false +} + +func (s *Store) FindEcVolume(vid needle.VolumeId) (*erasure_coding.EcVolume, bool) { + for _, location := range s.Locations { + if s, found := location.FindEcVolume(vid); found { + return s, true + } + } + return nil, false +} + +func (s *Store) DestroyEcVolume(vid needle.VolumeId) { + for _, location := range s.Locations { + location.DestroyEcVolume(vid) + } +} + +func (s *Store) ReadEcShardNeedle(vid needle.VolumeId, n *needle.Needle) (int, error) { + for _, location := range s.Locations { + if localEcVolume, found := location.FindEcVolume(vid); found { + + offset, size, intervals, err := localEcVolume.LocateEcShardNeedle(n.Id, localEcVolume.Version) + if err != nil { + return 0, fmt.Errorf("locate in local ec volume: %v", err) + } + if size == types.TombstoneFileSize { + return 0, fmt.Errorf("entry %s is deleted", n.Id) + } + + glog.V(3).Infof("read ec volume %d offset %d size %d intervals:%+v", vid, offset.ToAcutalOffset(), size, intervals) + + if len(intervals) > 1 { + glog.V(3).Infof("ReadEcShardNeedle needle id %s intervals:%+v", n.String(), intervals) + } + bytes, isDeleted, err := s.readEcShardIntervals(vid, n.Id, localEcVolume, intervals) + if err != nil { + return 0, fmt.Errorf("ReadEcShardIntervals: %v", err) + } + if isDeleted { + return 0, fmt.Errorf("ec entry %s is deleted", n.Id) + } + + err = n.ReadBytes(bytes, offset.ToAcutalOffset(), size, localEcVolume.Version) + if err != nil { + return 0, fmt.Errorf("readbytes: %v", err) + } + + return len(bytes), nil + } + } + return 0, fmt.Errorf("ec shard %d not found", vid) +} + +func (s *Store) readEcShardIntervals(vid needle.VolumeId, needleId types.NeedleId, ecVolume *erasure_coding.EcVolume, intervals []erasure_coding.Interval) (data []byte, is_deleted bool, err error) { + + if err = s.cachedLookupEcShardLocations(ecVolume); err != nil { + return nil, false, fmt.Errorf("failed to locate shard via master grpc %s: %v", s.MasterAddress, err) + } + + for i, interval := range intervals { + if d, isDeleted, e := s.readOneEcShardInterval(needleId, ecVolume, interval); e != nil { + return nil, isDeleted, e + } else { + if isDeleted { + is_deleted = true + } + if i == 0 { + data = d + } else { + data = append(data, d...) + } + } + } + return +} + +func (s *Store) readOneEcShardInterval(needleId types.NeedleId, ecVolume *erasure_coding.EcVolume, interval erasure_coding.Interval) (data []byte, is_deleted bool, err error) { + shardId, actualOffset := interval.ToShardIdAndOffset(erasure_coding.ErasureCodingLargeBlockSize, erasure_coding.ErasureCodingSmallBlockSize) + data = make([]byte, interval.Size) + if shard, found := ecVolume.FindEcVolumeShard(shardId); found { + if _, err = shard.ReadAt(data, actualOffset); err != nil { + glog.V(0).Infof("read local ec shard %d.%d: %v", ecVolume.VolumeId, shardId, err) + return + } + } else { + ecVolume.ShardLocationsLock.RLock() + sourceDataNodes, hasShardIdLocation := ecVolume.ShardLocations[shardId] + ecVolume.ShardLocationsLock.RUnlock() + + // try reading directly + if hasShardIdLocation { + _, is_deleted, err = s.readRemoteEcShardInterval(sourceDataNodes, needleId, ecVolume.VolumeId, shardId, data, actualOffset) + if err == nil { + return + } + glog.V(0).Infof("clearing ec shard %d.%d locations: %v", ecVolume.VolumeId, shardId, err) + forgetShardId(ecVolume, shardId) + } + + // try reading by recovering from other shards + _, is_deleted, err = s.recoverOneRemoteEcShardInterval(needleId, ecVolume, shardId, data, actualOffset) + if err == nil { + return + } + glog.V(0).Infof("recover ec shard %d.%d : %v", ecVolume.VolumeId, shardId, err) + } + return +} + +func forgetShardId(ecVolume *erasure_coding.EcVolume, shardId erasure_coding.ShardId) { + // failed to access the source data nodes, clear it up + ecVolume.ShardLocationsLock.Lock() + delete(ecVolume.ShardLocations, shardId) + ecVolume.ShardLocationsLock.Unlock() +} + +func (s *Store) cachedLookupEcShardLocations(ecVolume *erasure_coding.EcVolume) (err error) { + + shardCount := len(ecVolume.ShardLocations) + if shardCount < erasure_coding.DataShardsCount && + ecVolume.ShardLocationsRefreshTime.Add(11*time.Second).After(time.Now()) || + shardCount == erasure_coding.TotalShardsCount && + ecVolume.ShardLocationsRefreshTime.Add(37*time.Minute).After(time.Now()) || + shardCount >= erasure_coding.DataShardsCount && + ecVolume.ShardLocationsRefreshTime.Add(7*time.Minute).After(time.Now()) { + // still fresh + return nil + } + + glog.V(3).Infof("lookup and cache ec volume %d locations", ecVolume.VolumeId) + + err = operation.WithMasterServerClient(s.MasterAddress, s.grpcDialOption, func(masterClient master_pb.SeaweedClient) error { + req := &master_pb.LookupEcVolumeRequest{ + VolumeId: uint32(ecVolume.VolumeId), + } + resp, err := masterClient.LookupEcVolume(context.Background(), req) + if err != nil { + return fmt.Errorf("lookup ec volume %d: %v", ecVolume.VolumeId, err) + } + if len(resp.ShardIdLocations) < erasure_coding.DataShardsCount { + return fmt.Errorf("only %d shards found but %d required", len(resp.ShardIdLocations), erasure_coding.DataShardsCount) + } + + ecVolume.ShardLocationsLock.Lock() + for _, shardIdLocations := range resp.ShardIdLocations { + shardId := erasure_coding.ShardId(shardIdLocations.ShardId) + delete(ecVolume.ShardLocations, shardId) + for _, loc := range shardIdLocations.Locations { + ecVolume.ShardLocations[shardId] = append(ecVolume.ShardLocations[shardId], loc.Url) + } + } + ecVolume.ShardLocationsRefreshTime = time.Now() + ecVolume.ShardLocationsLock.Unlock() + + return nil + }) + return +} + +func (s *Store) readRemoteEcShardInterval(sourceDataNodes []string, needleId types.NeedleId, vid needle.VolumeId, shardId erasure_coding.ShardId, buf []byte, offset int64) (n int, is_deleted bool, err error) { + + if len(sourceDataNodes) == 0 { + return 0, false, fmt.Errorf("failed to find ec shard %d.%d", vid, shardId) + } + + for _, sourceDataNode := range sourceDataNodes { + glog.V(3).Infof("read remote ec shard %d.%d from %s", vid, shardId, sourceDataNode) + n, is_deleted, err = s.doReadRemoteEcShardInterval(sourceDataNode, needleId, vid, shardId, buf, offset) + if err == nil { + return + } + glog.V(1).Infof("read remote ec shard %d.%d from %s: %v", vid, shardId, sourceDataNode, err) + } + + return +} + +func (s *Store) doReadRemoteEcShardInterval(sourceDataNode string, needleId types.NeedleId, vid needle.VolumeId, shardId erasure_coding.ShardId, buf []byte, offset int64) (n int, is_deleted bool, err error) { + + err = operation.WithVolumeServerClient(sourceDataNode, s.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + + // copy data slice + shardReadClient, err := client.VolumeEcShardRead(context.Background(), &volume_server_pb.VolumeEcShardReadRequest{ + VolumeId: uint32(vid), + ShardId: uint32(shardId), + Offset: offset, + Size: int64(len(buf)), + FileKey: uint64(needleId), + }) + if err != nil { + return fmt.Errorf("failed to start reading ec shard %d.%d from %s: %v", vid, shardId, sourceDataNode, err) + } + + for { + resp, receiveErr := shardReadClient.Recv() + if receiveErr == io.EOF { + break + } + if receiveErr != nil { + return fmt.Errorf("receiving ec shard %d.%d from %s: %v", vid, shardId, sourceDataNode, err) + } + if resp.IsDeleted { + is_deleted = true + } + copy(buf[n:n+len(resp.Data)], resp.Data) + n += len(resp.Data) + } + + return nil + }) + if err != nil { + return 0, is_deleted, fmt.Errorf("read ec shard %d.%d from %s: %v", vid, shardId, sourceDataNode, err) + } + + return +} + +func (s *Store) recoverOneRemoteEcShardInterval(needleId types.NeedleId, ecVolume *erasure_coding.EcVolume, shardIdToRecover erasure_coding.ShardId, buf []byte, offset int64) (n int, is_deleted bool, err error) { + glog.V(3).Infof("recover ec shard %d.%d from other locations", ecVolume.VolumeId, shardIdToRecover) + + enc, err := reedsolomon.New(erasure_coding.DataShardsCount, erasure_coding.ParityShardsCount) + if err != nil { + return 0, false, fmt.Errorf("failed to create encoder: %v", err) + } + + bufs := make([][]byte, erasure_coding.TotalShardsCount) + + var wg sync.WaitGroup + ecVolume.ShardLocationsLock.RLock() + for shardId, locations := range ecVolume.ShardLocations { + + // skip currnent shard or empty shard + if shardId == shardIdToRecover { + continue + } + if len(locations) == 0 { + glog.V(3).Infof("readRemoteEcShardInterval missing %d.%d from %+v", ecVolume.VolumeId, shardId, locations) + continue + } + + // read from remote locations + wg.Add(1) + go func(shardId erasure_coding.ShardId, locations []string) { + defer wg.Done() + data := make([]byte, len(buf)) + nRead, isDeleted, readErr := s.readRemoteEcShardInterval(locations, needleId, ecVolume.VolumeId, shardId, data, offset) + if readErr != nil { + glog.V(3).Infof("recover: readRemoteEcShardInterval %d.%d %d bytes from %+v: %v", ecVolume.VolumeId, shardId, nRead, locations, readErr) + forgetShardId(ecVolume, shardId) + } + if isDeleted { + is_deleted = true + } + if nRead == len(buf) { + bufs[shardId] = data + } + }(shardId, locations) + } + ecVolume.ShardLocationsLock.RUnlock() + + wg.Wait() + + if err = enc.ReconstructData(bufs); err != nil { + glog.V(3).Infof("recovered ec shard %d.%d failed: %v", ecVolume.VolumeId, shardIdToRecover, err) + return 0, false, err + } + glog.V(4).Infof("recovered ec shard %d.%d from other locations", ecVolume.VolumeId, shardIdToRecover) + + copy(buf, bufs[shardIdToRecover]) + + return len(buf), is_deleted, nil +} + +func (s *Store) EcVolumes() (ecVolumes []*erasure_coding.EcVolume) { + for _, location := range s.Locations { + location.ecVolumesLock.RLock() + for _, v := range location.ecVolumes { + ecVolumes = append(ecVolumes, v) + } + location.ecVolumesLock.RUnlock() + } + sort.Slice(ecVolumes, func(i, j int) bool { + return ecVolumes[i].VolumeId > ecVolumes[j].VolumeId + }) + return ecVolumes +} diff --git a/weed/storage/store_ec_delete.go b/weed/storage/store_ec_delete.go new file mode 100644 index 000000000..4a75fb20b --- /dev/null +++ b/weed/storage/store_ec_delete.go @@ -0,0 +1,105 @@ +package storage + +import ( + "context" + "fmt" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +func (s *Store) DeleteEcShardNeedle(ecVolume *erasure_coding.EcVolume, n *needle.Needle, cookie types.Cookie) (int64, error) { + + count, err := s.ReadEcShardNeedle(ecVolume.VolumeId, n) + + if err != nil { + return 0, err + } + + if cookie != n.Cookie { + return 0, fmt.Errorf("unexpected cookie %x", cookie) + } + + if err = s.doDeleteNeedleFromAtLeastOneRemoteEcShards(ecVolume, n.Id); err != nil { + return 0, err + } + + return int64(count), nil + +} + +func (s *Store) doDeleteNeedleFromAtLeastOneRemoteEcShards(ecVolume *erasure_coding.EcVolume, needleId types.NeedleId) error { + + _, _, intervals, err := ecVolume.LocateEcShardNeedle(needleId, ecVolume.Version) + + if len(intervals) == 0 { + return erasure_coding.NotFoundError + } + + shardId, _ := intervals[0].ToShardIdAndOffset(erasure_coding.ErasureCodingLargeBlockSize, erasure_coding.ErasureCodingSmallBlockSize) + + hasDeletionSuccess := false + err = s.doDeleteNeedleFromRemoteEcShardServers(shardId, ecVolume, needleId) + if err == nil { + hasDeletionSuccess = true + } + + for shardId = erasure_coding.DataShardsCount; shardId < erasure_coding.TotalShardsCount; shardId++ { + if parityDeletionError := s.doDeleteNeedleFromRemoteEcShardServers(shardId, ecVolume, needleId); parityDeletionError == nil { + hasDeletionSuccess = true + } + } + + if hasDeletionSuccess { + return nil + } + + return err + +} + +func (s *Store) doDeleteNeedleFromRemoteEcShardServers(shardId erasure_coding.ShardId, ecVolume *erasure_coding.EcVolume, needleId types.NeedleId) error { + + ecVolume.ShardLocationsLock.RLock() + sourceDataNodes, hasShardLocations := ecVolume.ShardLocations[shardId] + ecVolume.ShardLocationsLock.RUnlock() + + if !hasShardLocations { + return fmt.Errorf("ec shard %d.%d not located", ecVolume.VolumeId, shardId) + } + + for _, sourceDataNode := range sourceDataNodes { + glog.V(4).Infof("delete from remote ec shard %d.%d from %s", ecVolume.VolumeId, shardId, sourceDataNode) + err := s.doDeleteNeedleFromRemoteEcShard(sourceDataNode, ecVolume.VolumeId, ecVolume.Collection, ecVolume.Version, needleId) + if err != nil { + return err + } + glog.V(1).Infof("delete from remote ec shard %d.%d from %s: %v", ecVolume.VolumeId, shardId, sourceDataNode, err) + } + + return nil + +} + +func (s *Store) doDeleteNeedleFromRemoteEcShard(sourceDataNode string, vid needle.VolumeId, collection string, version needle.Version, needleId types.NeedleId) error { + + return operation.WithVolumeServerClient(sourceDataNode, s.grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + + // copy data slice + _, err := client.VolumeEcBlobDelete(context.Background(), &volume_server_pb.VolumeEcBlobDeleteRequest{ + VolumeId: uint32(vid), + Collection: collection, + FileKey: uint64(needleId), + Version: uint32(version), + }) + if err != nil { + return fmt.Errorf("failed to delete from ec shard %d on %s: %v", vid, sourceDataNode, err) + } + return nil + }) + +} diff --git a/weed/storage/store_vacuum.go b/weed/storage/store_vacuum.go index cc0521491..38159496e 100644 --- a/weed/storage/store_vacuum.go +++ b/weed/storage/store_vacuum.go @@ -2,29 +2,31 @@ package storage import ( "fmt" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) -func (s *Store) CheckCompactVolume(volumeId VolumeId) (float64, error) { +func (s *Store) CheckCompactVolume(volumeId needle.VolumeId) (float64, error) { if v := s.findVolume(volumeId); v != nil { glog.V(3).Infof("volumd %d garbage level: %f", volumeId, v.garbageLevel()) return v.garbageLevel(), nil } return 0, fmt.Errorf("volume id %d is not found during check compact", volumeId) } -func (s *Store) CompactVolume(vid VolumeId, preallocate int64) error { +func (s *Store) CompactVolume(vid needle.VolumeId, preallocate int64, compactionBytePerSecond int64) error { if v := s.findVolume(vid); v != nil { - return v.Compact(preallocate) + return v.Compact2(preallocate, compactionBytePerSecond) } return fmt.Errorf("volume id %d is not found during compact", vid) } -func (s *Store) CommitCompactVolume(vid VolumeId) error { +func (s *Store) CommitCompactVolume(vid needle.VolumeId) error { if v := s.findVolume(vid); v != nil { - return v.commitCompact() + return v.CommitCompact() } return fmt.Errorf("volume id %d is not found during commit compact", vid) } -func (s *Store) CommitCleanupVolume(vid VolumeId) error { +func (s *Store) CommitCleanupVolume(vid needle.VolumeId) error { if v := s.findVolume(vid); v != nil { return v.cleanupCompact() } diff --git a/weed/storage/replica_placement.go b/weed/storage/super_block/replica_placement.go index c1aca52eb..fcccbba7d 100644 --- a/weed/storage/replica_placement.go +++ b/weed/storage/super_block/replica_placement.go @@ -1,4 +1,4 @@ -package storage +package super_block import ( "errors" diff --git a/weed/storage/super_block/replica_placement_test.go b/weed/storage/super_block/replica_placement_test.go new file mode 100644 index 000000000..7742ba548 --- /dev/null +++ b/weed/storage/super_block/replica_placement_test.go @@ -0,0 +1,14 @@ +package super_block + +import ( + "testing" +) + +func TestReplicaPlacementSerialDeserial(t *testing.T) { + rp, _ := NewReplicaPlacementFromString("001") + newRp, _ := NewReplicaPlacementFromByte(rp.Byte()) + if rp.String() != newRp.String() { + println("expected:", rp.String(), "actual:", newRp.String()) + t.Fail() + } +} diff --git a/weed/storage/super_block/super_block.go b/weed/storage/super_block/super_block.go new file mode 100644 index 000000000..f48cd0bdc --- /dev/null +++ b/weed/storage/super_block/super_block.go @@ -0,0 +1,69 @@ +package super_block + +import ( + "github.com/golang/protobuf/proto" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/util" +) + +const ( + SuperBlockSize = 8 +) + +/* +* Super block currently has 8 bytes allocated for each volume. +* Byte 0: version, 1 or 2 +* Byte 1: Replica Placement strategy, 000, 001, 002, 010, etc +* Byte 2 and byte 3: Time to live. See TTL for definition +* Byte 4 and byte 5: The number of times the volume has been compacted. +* Rest bytes: Reserved + */ +type SuperBlock struct { + Version needle.Version + ReplicaPlacement *ReplicaPlacement + Ttl *needle.TTL + CompactionRevision uint16 + Extra *master_pb.SuperBlockExtra + ExtraSize uint16 +} + +func (s *SuperBlock) BlockSize() int { + switch s.Version { + case needle.Version2, needle.Version3: + return SuperBlockSize + int(s.ExtraSize) + } + return SuperBlockSize +} + +func (s *SuperBlock) Bytes() []byte { + header := make([]byte, SuperBlockSize) + header[0] = byte(s.Version) + header[1] = s.ReplicaPlacement.Byte() + s.Ttl.ToBytes(header[2:4]) + util.Uint16toBytes(header[4:6], s.CompactionRevision) + + if s.Extra != nil { + extraData, err := proto.Marshal(s.Extra) + if err != nil { + glog.Fatalf("cannot marshal super block extra %+v: %v", s.Extra, err) + } + extraSize := len(extraData) + if extraSize > 256*256-2 { + // reserve a couple of bits for future extension + glog.Fatalf("super block extra size is %d bigger than %d", extraSize, 256*256-2) + } + s.ExtraSize = uint16(extraSize) + util.Uint16toBytes(header[6:8], s.ExtraSize) + + header = append(header, extraData...) + } + + return header +} + +func (s *SuperBlock) Initialized() bool { + return s.ReplicaPlacement != nil && s.Ttl != nil +} diff --git a/weed/storage/super_block/super_block_read.go.go b/weed/storage/super_block/super_block_read.go.go new file mode 100644 index 000000000..9eb12e116 --- /dev/null +++ b/weed/storage/super_block/super_block_read.go.go @@ -0,0 +1,44 @@ +package super_block + +import ( + "fmt" + + "github.com/golang/protobuf/proto" + + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/util" +) + +// ReadSuperBlock reads from data file and load it into volume's super block +func ReadSuperBlock(datBackend backend.BackendStorageFile) (superBlock SuperBlock, err error) { + + header := make([]byte, SuperBlockSize) + if _, e := datBackend.ReadAt(header, 0); e != nil { + err = fmt.Errorf("cannot read volume %s super block: %v", datBackend.Name(), e) + return + } + + superBlock.Version = needle.Version(header[0]) + if superBlock.ReplicaPlacement, err = NewReplicaPlacementFromByte(header[1]); err != nil { + err = fmt.Errorf("cannot read replica type: %s", err.Error()) + return + } + superBlock.Ttl = needle.LoadTTLFromBytes(header[2:4]) + superBlock.CompactionRevision = util.BytesToUint16(header[4:6]) + superBlock.ExtraSize = util.BytesToUint16(header[6:8]) + + if superBlock.ExtraSize > 0 { + // read more + extraData := make([]byte, int(superBlock.ExtraSize)) + superBlock.Extra = &master_pb.SuperBlockExtra{} + err = proto.Unmarshal(extraData, superBlock.Extra) + if err != nil { + err = fmt.Errorf("cannot read volume %s super block extra: %v", datBackend.Name(), err) + return + } + } + + return +} diff --git a/weed/storage/volume_super_block_test.go b/weed/storage/super_block/super_block_test.go index 13db4b194..25699070d 100644 --- a/weed/storage/volume_super_block_test.go +++ b/weed/storage/super_block/super_block_test.go @@ -1,21 +1,23 @@ -package storage +package super_block import ( "testing" + + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) func TestSuperBlockReadWrite(t *testing.T) { rp, _ := NewReplicaPlacementFromByte(byte(001)) - ttl, _ := ReadTTL("15d") + ttl, _ := needle.ReadTTL("15d") s := &SuperBlock{ - version: CurrentVersion, + Version: needle.CurrentVersion, ReplicaPlacement: rp, Ttl: ttl, } bytes := s.Bytes() - if !(bytes[2] == 15 && bytes[3] == Day) { + if !(bytes[2] == 15 && bytes[3] == needle.Day) { println("byte[2]:", bytes[2], "byte[3]:", bytes[3]) t.Fail() } diff --git a/weed/storage/types/needle_id_128bit.go b/weed/storage/types/needle_id_128bit.go deleted file mode 100644 index 3af0b7285..000000000 --- a/weed/storage/types/needle_id_128bit.go +++ /dev/null @@ -1,44 +0,0 @@ -// +build 128BitNeedleId - -package types - -import ( - "encoding/hex" -) - -const ( - NeedleIdSize = 16 - NeedleIdEmpty = "" -) - -// this is a 128 bit needle id implementation. -// Usually a FileId has 32bit volume id, 64bit needle id, 32 bit cookie. -// But when your system is using UUID, which is 128 bit, a custom 128-bit needle id can be easier to manage. -// Caveat: In this mode, the fildId from master /dir/assign can not be directly used. -// Only the volume id and cookie from the fileId are usuable. -type NeedleId string - -func NeedleIdToBytes(bytes []byte, needleId NeedleId) { - hex.Decode(bytes, []byte(needleId)) -} - -// NeedleIdToUint64 used to send max needle id to master -func NeedleIdToUint64(needleId NeedleId) uint64 { - return 0 -} - -func Uint64ToNeedleId(needleId uint64) NeedleId { - return NeedleId("") -} - -func BytesToNeedleId(bytes []byte) (needleId NeedleId) { - return NeedleId(hex.EncodeToString(bytes)) -} - -func (k NeedleId) String() string { - return string(k) -} - -func ParseNeedleId(idString string) (NeedleId, error) { - return NeedleId(idString), nil -} diff --git a/weed/storage/types/needle_id_type.go b/weed/storage/types/needle_id_type.go index 4a890cd16..32a296613 100644 --- a/weed/storage/types/needle_id_type.go +++ b/weed/storage/types/needle_id_type.go @@ -1,5 +1,3 @@ -// +build !128BitNeedleId - package types import ( diff --git a/weed/storage/types/needle_types.go b/weed/storage/types/needle_types.go index ce4e601e4..2ebb392db 100644 --- a/weed/storage/types/needle_types.go +++ b/weed/storage/types/needle_types.go @@ -7,18 +7,28 @@ import ( "strconv" ) -type Offset uint32 +type Offset struct { + OffsetHigher + OffsetLower +} + +type OffsetLower struct { + b3 byte + b2 byte + b1 byte + b0 byte // the smaller byte +} + type Cookie uint32 const ( - OffsetSize = 4 - SizeSize = 4 // uint32 size - NeedleEntrySize = NeedleIdSize + OffsetSize + SizeSize - TimestampSize = 8 // int64 size - NeedlePaddingSize = 8 - MaxPossibleVolumeSize = 4 * 1024 * 1024 * 1024 * 8 - TombstoneFileSize = math.MaxUint32 - CookieSize = 4 + SizeSize = 4 // uint32 size + NeedleHeaderSize = CookieSize + NeedleIdSize + SizeSize + NeedleMapEntrySize = NeedleIdSize + OffsetSize + SizeSize + TimestampSize = 8 // int64 size + NeedlePaddingSize = 8 + TombstoneFileSize = math.MaxUint32 + CookieSize = 4 ) func CookieToBytes(bytes []byte, cookie Cookie) { @@ -39,15 +49,3 @@ func ParseCookie(cookieString string) (Cookie, error) { } return Cookie(cookie), nil } - -func OffsetToBytes(bytes []byte, offset Offset) { - util.Uint32toBytes(bytes, uint32(offset)) -} - -func Uint32ToOffset(offset uint32) Offset { - return Offset(offset) -} - -func BytesToOffset(bytes []byte) Offset { - return Offset(util.BytesToUint32(bytes[0:4])) -} diff --git a/weed/storage/types/offset_4bytes.go b/weed/storage/types/offset_4bytes.go new file mode 100644 index 000000000..9acd069d3 --- /dev/null +++ b/weed/storage/types/offset_4bytes.go @@ -0,0 +1,63 @@ +// +build !5BytesOffset + +package types + +import ( + "fmt" +) + +type OffsetHigher struct { + // b4 byte +} + +const ( + OffsetSize = 4 + MaxPossibleVolumeSize = 4 * 1024 * 1024 * 1024 * 8 // 32GB +) + +func OffsetToBytes(bytes []byte, offset Offset) { + bytes[3] = offset.b0 + bytes[2] = offset.b1 + bytes[1] = offset.b2 + bytes[0] = offset.b3 +} + +// only for testing, will be removed later. +func Uint32ToOffset(offset uint32) Offset { + return Offset{ + OffsetLower: OffsetLower{ + b0: byte(offset), + b1: byte(offset >> 8), + b2: byte(offset >> 16), + b3: byte(offset >> 24), + }, + } +} + +func BytesToOffset(bytes []byte) Offset { + return Offset{ + OffsetLower: OffsetLower{ + b0: bytes[3], + b1: bytes[2], + b2: bytes[1], + b3: bytes[0], + }, + } +} + +func (offset Offset) IsZero() bool { + return offset.b0 == 0 && offset.b1 == 0 && offset.b2 == 0 && offset.b3 == 0 +} + +func ToOffset(offset int64) Offset { + smaller := uint32(offset / int64(NeedlePaddingSize)) + return Uint32ToOffset(smaller) +} + +func (offset Offset) ToAcutalOffset() (actualOffset int64) { + return (int64(offset.b0) + int64(offset.b1)<<8 + int64(offset.b2)<<16 + int64(offset.b3)<<24) * int64(NeedlePaddingSize) +} + +func (offset Offset) String() string { + return fmt.Sprintf("%d", int64(offset.b0)+int64(offset.b1)<<8+int64(offset.b2)<<16+int64(offset.b3)<<24) +} diff --git a/weed/storage/types/offset_5bytes.go b/weed/storage/types/offset_5bytes.go new file mode 100644 index 000000000..f57e4f6d4 --- /dev/null +++ b/weed/storage/types/offset_5bytes.go @@ -0,0 +1,80 @@ +// +build 5BytesOffset + +package types + +import ( + "fmt" +) + +type OffsetHigher struct { + b4 byte +} + +const ( + OffsetSize = 4 + 1 + MaxPossibleVolumeSize = 4 * 1024 * 1024 * 1024 * 8 * 256 /* 256 is from the extra byte */ // 8TB +) + +func OffsetToBytes(bytes []byte, offset Offset) { + bytes[4] = offset.b4 + bytes[3] = offset.b0 + bytes[2] = offset.b1 + bytes[1] = offset.b2 + bytes[0] = offset.b3 +} + +// only for testing, will be removed later. +func Uint32ToOffset(offset uint32) Offset { + return Offset{ + OffsetHigher: OffsetHigher{ + b4: byte(offset >> 32), + }, + OffsetLower: OffsetLower{ + b0: byte(offset), + b1: byte(offset >> 8), + b2: byte(offset >> 16), + b3: byte(offset >> 24), + }, + } +} + +func BytesToOffset(bytes []byte) Offset { + return Offset{ + OffsetHigher: OffsetHigher{ + b4: bytes[4], + }, + OffsetLower: OffsetLower{ + b0: bytes[3], + b1: bytes[2], + b2: bytes[1], + b3: bytes[0], + }, + } +} + +func (offset Offset) IsZero() bool { + return offset.b0 == 0 && offset.b1 == 0 && offset.b2 == 0 && offset.b3 == 0 && offset.b4 == 0 +} + +func ToOffset(offset int64) Offset { + smaller := offset / int64(NeedlePaddingSize) + return Offset{ + OffsetHigher: OffsetHigher{ + b4: byte(smaller >> 32), + }, + OffsetLower: OffsetLower{ + b0: byte(smaller), + b1: byte(smaller >> 8), + b2: byte(smaller >> 16), + b3: byte(smaller >> 24), + }, + } +} + +func (offset Offset) ToAcutalOffset() (actualOffset int64) { + return (int64(offset.b0) + int64(offset.b1)<<8 + int64(offset.b2)<<16 + int64(offset.b3)<<24 + int64(offset.b4)<<32) * int64(NeedlePaddingSize) +} + +func (offset Offset) String() string { + return fmt.Sprintf("%d", int64(offset.b0)+int64(offset.b1)<<8+int64(offset.b2)<<16+int64(offset.b3)<<24+int64(offset.b4)<<32) +} diff --git a/weed/storage/volume.go b/weed/storage/volume.go index 3b03b7c83..7da83de7a 100644 --- a/weed/storage/volume.go +++ b/weed/storage/volume.go @@ -2,85 +2,169 @@ package storage import ( "fmt" - "os" "path" + "strconv" "sync" "time" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + "github.com/chrislusf/seaweedfs/weed/storage/types" + "github.com/chrislusf/seaweedfs/weed/glog" ) type Volume struct { - Id VolumeId - dir string - Collection string - dataFile *os.File - nm NeedleMapper - needleMapKind NeedleMapType - readOnly bool - - SuperBlock - - dataFileAccessLock sync.Mutex - lastModifiedTime uint64 //unix time in seconds + Id needle.VolumeId + dir string + Collection string + DataBackend backend.BackendStorageFile + nm NeedleMapper + needleMapKind NeedleMapType + noWriteOrDelete bool // if readonly, either noWriteOrDelete or noWriteCanDelete + noWriteCanDelete bool // if readonly, either noWriteOrDelete or noWriteCanDelete + hasRemoteFile bool // if the volume has a remote file + MemoryMapMaxSizeMb uint32 + + super_block.SuperBlock + + dataFileAccessLock sync.RWMutex + lastModifiedTsSeconds uint64 //unix time in seconds + lastAppendAtNs uint64 //unix time in nanoseconds lastCompactIndexOffset uint64 lastCompactRevision uint16 + + isCompacting bool + + volumeInfo *volume_server_pb.VolumeInfo } -func NewVolume(dirname string, collection string, id VolumeId, needleMapKind NeedleMapType, replicaPlacement *ReplicaPlacement, ttl *TTL, preallocate int64) (v *Volume, e error) { +func NewVolume(dirname string, collection string, id needle.VolumeId, needleMapKind NeedleMapType, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, memoryMapMaxSizeMb uint32) (v *Volume, e error) { // if replicaPlacement is nil, the superblock will be loaded from disk - v = &Volume{dir: dirname, Collection: collection, Id: id} - v.SuperBlock = SuperBlock{ReplicaPlacement: replicaPlacement, Ttl: ttl} + v = &Volume{dir: dirname, Collection: collection, Id: id, MemoryMapMaxSizeMb: memoryMapMaxSizeMb} + v.SuperBlock = super_block.SuperBlock{ReplicaPlacement: replicaPlacement, Ttl: ttl} v.needleMapKind = needleMapKind e = v.load(true, true, needleMapKind, preallocate) return } func (v *Volume) String() string { - return fmt.Sprintf("Id:%v, dir:%s, Collection:%s, dataFile:%v, nm:%v, readOnly:%v", v.Id, v.dir, v.Collection, v.dataFile, v.nm, v.readOnly) + return fmt.Sprintf("Id:%v, dir:%s, Collection:%s, dataFile:%v, nm:%v, noWrite:%v canDelete:%v", v.Id, v.dir, v.Collection, v.DataBackend, v.nm, v.noWriteOrDelete || v.noWriteCanDelete, v.noWriteCanDelete) } -func (v *Volume) FileName() (fileName string) { - if v.Collection == "" { - fileName = path.Join(v.dir, v.Id.String()) +func VolumeFileName(dir string, collection string, id int) (fileName string) { + idString := strconv.Itoa(id) + if collection == "" { + fileName = path.Join(dir, idString) } else { - fileName = path.Join(v.dir, v.Collection+"_"+v.Id.String()) + fileName = path.Join(dir, collection+"_"+idString) } return } -func (v *Volume) DataFile() *os.File { - return v.dataFile +func (v *Volume) FileName() (fileName string) { + return VolumeFileName(v.dir, v.Collection, int(v.Id)) } -func (v *Volume) Version() Version { - return v.SuperBlock.Version() +func (v *Volume) Version() needle.Version { + if v.volumeInfo.Version != 0 { + v.SuperBlock.Version = needle.Version(v.volumeInfo.Version) + } + return v.SuperBlock.Version } -func (v *Volume) Size() int64 { - stat, e := v.dataFile.Stat() +func (v *Volume) FileStat() (datSize uint64, idxSize uint64, modTime time.Time) { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + + if v.DataBackend == nil { + return + } + + datFileSize, modTime, e := v.DataBackend.GetStat() if e == nil { - return stat.Size() + return uint64(datFileSize), v.nm.IndexFileSize(), modTime + } + glog.V(0).Infof("Failed to read file size %s %v", v.DataBackend.Name(), e) + return // -1 causes integer overflow and the volume to become unwritable. +} + +func (v *Volume) ContentSize() uint64 { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + if v.nm == nil { + return 0 + } + return v.nm.ContentSize() +} + +func (v *Volume) DeletedSize() uint64 { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + if v.nm == nil { + return 0 + } + return v.nm.DeletedSize() +} + +func (v *Volume) FileCount() uint64 { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + if v.nm == nil { + return 0 + } + return uint64(v.nm.FileCount()) +} + +func (v *Volume) DeletedCount() uint64 { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + if v.nm == nil { + return 0 + } + return uint64(v.nm.DeletedCount()) +} + +func (v *Volume) MaxFileKey() types.NeedleId { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + if v.nm == nil { + return 0 + } + return v.nm.MaxFileKey() +} + +func (v *Volume) IndexFileSize() uint64 { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + if v.nm == nil { + return 0 } - glog.V(0).Infof("Failed to read file size %s %v", v.dataFile.Name(), e) - return 0 // -1 causes integer overflow and the volume to become unwritable. + return v.nm.IndexFileSize() } // Close cleanly shuts down this volume func (v *Volume) Close() { v.dataFileAccessLock.Lock() defer v.dataFileAccessLock.Unlock() - v.nm.Close() - _ = v.dataFile.Close() + if v.nm != nil { + v.nm.Close() + v.nm = nil + } + if v.DataBackend != nil { + _ = v.DataBackend.Close() + v.DataBackend = nil + stats.VolumeServerVolumeCounter.WithLabelValues(v.Collection, "volume").Dec() + } } func (v *Volume) NeedToReplicate() bool { return v.ReplicaPlacement.GetCopyCount() > 1 } -func (v *Volume) ContentSize() uint64 { - return v.nm.ContentSize() -} - // volume is expired if modified time + volume ttl < now // except when volume is empty // or when the volume does not have a ttl @@ -96,9 +180,9 @@ func (v *Volume) expired(volumeSizeLimit uint64) bool { if v.Ttl == nil || v.Ttl.Minutes() == 0 { return false } - glog.V(1).Infof("now:%v lastModified:%v", time.Now().Unix(), v.lastModifiedTime) - livedMinutes := (time.Now().Unix() - int64(v.lastModifiedTime)) / 60 - glog.V(1).Infof("ttl:%v lived:%v", v.Ttl, livedMinutes) + glog.V(2).Infof("now:%v lastModified:%v", time.Now().Unix(), v.lastModifiedTsSeconds) + livedMinutes := (time.Now().Unix() - int64(v.lastModifiedTsSeconds)) / 60 + glog.V(2).Infof("ttl:%v lived:%v", v.Ttl, livedMinutes) if int64(v.Ttl.Minutes()) < livedMinutes { return true } @@ -106,7 +190,7 @@ func (v *Volume) expired(volumeSizeLimit uint64) bool { } // wait either maxDelayMinutes or 10% of ttl minutes -func (v *Volume) exiredLongEnough(maxDelayMinutes uint32) bool { +func (v *Volume) expiredLongEnough(maxDelayMinutes uint32) bool { if v.Ttl == nil || v.Ttl.Minutes() == 0 { return false } @@ -115,8 +199,41 @@ func (v *Volume) exiredLongEnough(maxDelayMinutes uint32) bool { removalDelay = maxDelayMinutes } - if uint64(v.Ttl.Minutes()+removalDelay)*60+v.lastModifiedTime < uint64(time.Now().Unix()) { + if uint64(v.Ttl.Minutes()+removalDelay)*60+v.lastModifiedTsSeconds < uint64(time.Now().Unix()) { return true } return false } + +func (v *Volume) ToVolumeInformationMessage() *master_pb.VolumeInformationMessage { + size, _, modTime := v.FileStat() + + volumInfo := &master_pb.VolumeInformationMessage{ + Id: uint32(v.Id), + Size: size, + Collection: v.Collection, + FileCount: v.FileCount(), + DeleteCount: v.DeletedCount(), + DeletedByteCount: v.DeletedSize(), + ReadOnly: v.noWriteOrDelete || v.noWriteCanDelete, + ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()), + Version: uint32(v.Version()), + Ttl: v.Ttl.ToUint32(), + CompactRevision: uint32(v.SuperBlock.CompactionRevision), + ModifiedAtSecond: modTime.Unix(), + } + + volumInfo.RemoteStorageName, volumInfo.RemoteStorageKey = v.RemoteStorageNameKey() + + return volumInfo +} + +func (v *Volume) RemoteStorageNameKey() (storageName, storageKey string) { + if v.volumeInfo == nil { + return + } + if len(v.volumeInfo.GetFiles()) == 0 { + return + } + return v.volumeInfo.GetFiles()[0].BackendName(), v.volumeInfo.GetFiles()[0].GetKey() +} diff --git a/weed/storage/volume_backup.go b/weed/storage/volume_backup.go new file mode 100644 index 000000000..f7075fe2b --- /dev/null +++ b/weed/storage/volume_backup.go @@ -0,0 +1,260 @@ +package storage + +import ( + "context" + "fmt" + "io" + "os" + + "google.golang.org/grpc" + + "github.com/chrislusf/seaweedfs/weed/operation" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/idx" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + . "github.com/chrislusf/seaweedfs/weed/storage/types" +) + +func (v *Volume) GetVolumeSyncStatus() *volume_server_pb.VolumeSyncStatusResponse { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + + var syncStatus = &volume_server_pb.VolumeSyncStatusResponse{} + if datSize, _, err := v.DataBackend.GetStat(); err == nil { + syncStatus.TailOffset = uint64(datSize) + } + syncStatus.Collection = v.Collection + syncStatus.IdxFileSize = v.nm.IndexFileSize() + syncStatus.CompactRevision = uint32(v.SuperBlock.CompactionRevision) + syncStatus.Ttl = v.SuperBlock.Ttl.String() + syncStatus.Replication = v.SuperBlock.ReplicaPlacement.String() + return syncStatus +} + +// The volume sync with a master volume via 2 steps: +// 1. The slave checks master side to find subscription checkpoint +// to setup the replication. +// 2. The slave receives the updates from master + +/* +Assume the slave volume needs to follow the master volume. + +The master volume could be compacted, and could be many files ahead of +slave volume. + +Step 0: // implemented in command/backup.go, to avoid dat file size overflow. +0.1 If slave compact version is less than the master, do a local compaction, and set +local compact version the same as the master. +0.2 If the slave size is still bigger than the master, discard local copy and do a full copy. + +Step 1: +The slave volume ask the master by the last modification time t. +The master do a binary search in volume (use .idx as an array, and check the appendAtNs in .dat file), +to find the first entry with appendAtNs > t. + +Step 2: +The master send content bytes to the slave. The bytes are not chunked by needle. + +Step 3: +The slave generate the needle map for the new bytes. (This may be optimized to incrementally +update needle map when receiving new .dat bytes. But seems not necessary now.) + +*/ + +func (v *Volume) IncrementalBackup(volumeServer string, grpcDialOption grpc.DialOption) error { + + startFromOffset, _, _ := v.FileStat() + appendAtNs, err := v.findLastAppendAtNs() + if err != nil { + return err + } + + writeOffset := int64(startFromOffset) + + err = operation.WithVolumeServerClient(volumeServer, grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + + stream, err := client.VolumeIncrementalCopy(context.Background(), &volume_server_pb.VolumeIncrementalCopyRequest{ + VolumeId: uint32(v.Id), + SinceNs: appendAtNs, + }) + if err != nil { + return err + } + + for { + resp, recvErr := stream.Recv() + if recvErr != nil { + if recvErr == io.EOF { + break + } else { + return recvErr + } + } + + n, writeErr := v.DataBackend.WriteAt(resp.FileContent, writeOffset) + if writeErr != nil { + return writeErr + } + writeOffset += int64(n) + } + + return nil + + }) + + if err != nil { + return err + } + + // add to needle map + return ScanVolumeFileFrom(v.Version(), v.DataBackend, int64(startFromOffset), &VolumeFileScanner4GenIdx{v: v}) + +} + +func (v *Volume) findLastAppendAtNs() (uint64, error) { + offset, err := v.locateLastAppendEntry() + if err != nil { + return 0, err + } + if offset.IsZero() { + return 0, nil + } + return v.readAppendAtNs(offset) +} + +func (v *Volume) locateLastAppendEntry() (Offset, error) { + indexFile, e := os.OpenFile(v.FileName()+".idx", os.O_RDONLY, 0644) + if e != nil { + return Offset{}, fmt.Errorf("cannot read %s.idx: %v", v.FileName(), e) + } + defer indexFile.Close() + + fi, err := indexFile.Stat() + if err != nil { + return Offset{}, fmt.Errorf("file %s stat error: %v", indexFile.Name(), err) + } + fileSize := fi.Size() + if fileSize%NeedleMapEntrySize != 0 { + return Offset{}, fmt.Errorf("unexpected file %s size: %d", indexFile.Name(), fileSize) + } + if fileSize == 0 { + return Offset{}, nil + } + + bytes := make([]byte, NeedleMapEntrySize) + n, e := indexFile.ReadAt(bytes, fileSize-NeedleMapEntrySize) + if n != NeedleMapEntrySize { + return Offset{}, fmt.Errorf("file %s read error: %v", indexFile.Name(), e) + } + _, offset, _ := idx.IdxFileEntry(bytes) + + return offset, nil +} + +func (v *Volume) readAppendAtNs(offset Offset) (uint64, error) { + + n, _, bodyLength, err := needle.ReadNeedleHeader(v.DataBackend, v.SuperBlock.Version, offset.ToAcutalOffset()) + if err != nil { + return 0, fmt.Errorf("ReadNeedleHeader: %v", err) + } + _, err = n.ReadNeedleBody(v.DataBackend, v.SuperBlock.Version, offset.ToAcutalOffset()+int64(NeedleHeaderSize), bodyLength) + if err != nil { + return 0, fmt.Errorf("ReadNeedleBody offset %d, bodyLength %d: %v", offset.ToAcutalOffset(), bodyLength, err) + } + return n.AppendAtNs, nil + +} + +// on server side +func (v *Volume) BinarySearchByAppendAtNs(sinceNs uint64) (offset Offset, isLast bool, err error) { + indexFile, openErr := os.OpenFile(v.FileName()+".idx", os.O_RDONLY, 0644) + if openErr != nil { + err = fmt.Errorf("cannot read %s.idx: %v", v.FileName(), openErr) + return + } + defer indexFile.Close() + + fi, statErr := indexFile.Stat() + if statErr != nil { + err = fmt.Errorf("file %s stat error: %v", indexFile.Name(), statErr) + return + } + fileSize := fi.Size() + if fileSize%NeedleMapEntrySize != 0 { + err = fmt.Errorf("unexpected file %s size: %d", indexFile.Name(), fileSize) + return + } + + bytes := make([]byte, NeedleMapEntrySize) + entryCount := fileSize / NeedleMapEntrySize + l := int64(0) + h := entryCount + + for l < h { + + m := (l + h) / 2 + + if m == entryCount { + return Offset{}, true, nil + } + + // read the appendAtNs for entry m + offset, err = v.readAppendAtNsForIndexEntry(indexFile, bytes, m) + if err != nil { + return + } + + mNs, nsReadErr := v.readAppendAtNs(offset) + if nsReadErr != nil { + err = nsReadErr + return + } + + // move the boundary + if mNs <= sinceNs { + l = m + 1 + } else { + h = m + } + + } + + if l == entryCount { + return Offset{}, true, nil + } + + offset, err = v.readAppendAtNsForIndexEntry(indexFile, bytes, l) + + return offset, false, err + +} + +// bytes is of size NeedleMapEntrySize +func (v *Volume) readAppendAtNsForIndexEntry(indexFile *os.File, bytes []byte, m int64) (Offset, error) { + if _, readErr := indexFile.ReadAt(bytes, m*NeedleMapEntrySize); readErr != nil && readErr != io.EOF { + return Offset{}, readErr + } + _, offset, _ := idx.IdxFileEntry(bytes) + return offset, nil +} + +// generate the volume idx +type VolumeFileScanner4GenIdx struct { + v *Volume +} + +func (scanner *VolumeFileScanner4GenIdx) VisitSuperBlock(superBlock super_block.SuperBlock) error { + return nil + +} +func (scanner *VolumeFileScanner4GenIdx) ReadNeedleBody() bool { + return false +} + +func (scanner *VolumeFileScanner4GenIdx) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { + if n.Size > 0 && n.Size != TombstoneFileSize { + return scanner.v.nm.Put(n.Id, ToOffset(offset), n.Size) + } + return scanner.v.nm.Delete(n.Id, ToOffset(offset)) +} diff --git a/weed/storage/volume_backup_test.go b/weed/storage/volume_backup_test.go new file mode 100644 index 000000000..3291d203a --- /dev/null +++ b/weed/storage/volume_backup_test.go @@ -0,0 +1,39 @@ +package storage + +import "testing" + +func TestBinarySearch(t *testing.T) { + var testInput []int + testInput = []int{-1, 0, 3, 5, 9, 12} + + if 3 != binarySearchForLargerThanTarget(testInput, 4) { + t.Errorf("failed to find target %d", 4) + } + if 3 != binarySearchForLargerThanTarget(testInput, 3) { + t.Errorf("failed to find target %d", 3) + } + if 6 != binarySearchForLargerThanTarget(testInput, 12) { + t.Errorf("failed to find target %d", 12) + } + if 1 != binarySearchForLargerThanTarget(testInput, -1) { + t.Errorf("failed to find target %d", -1) + } + if 0 != binarySearchForLargerThanTarget(testInput, -2) { + t.Errorf("failed to find target %d", -2) + } + +} + +func binarySearchForLargerThanTarget(nums []int, target int) int { + l := 0 + h := len(nums) + for l < h { + m := (l + h) / 2 + if nums[m] <= target { + l = m + 1 + } else { + h = m + } + } + return l +} diff --git a/weed/storage/volume_checking.go b/weed/storage/volume_checking.go index 12c282be9..a65c2a3ff 100644 --- a/weed/storage/volume_checking.go +++ b/weed/storage/volume_checking.go @@ -4,41 +4,41 @@ import ( "fmt" "os" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/idx" + "github.com/chrislusf/seaweedfs/weed/storage/needle" . "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/chrislusf/seaweedfs/weed/util" ) -func getActualSize(size uint32, version Version) int64 { - return NeedleEntrySize + NeedleBodyLength(size, version) -} - -func CheckVolumeDataIntegrity(v *Volume, indexFile *os.File) error { +func CheckVolumeDataIntegrity(v *Volume, indexFile *os.File) (lastAppendAtNs uint64, e error) { var indexSize int64 - var e error if indexSize, e = verifyIndexFileIntegrity(indexFile); e != nil { - return fmt.Errorf("verifyIndexFileIntegrity %s failed: %v", indexFile.Name(), e) + return 0, fmt.Errorf("verifyIndexFileIntegrity %s failed: %v", indexFile.Name(), e) } if indexSize == 0 { - return nil + return 0, nil } var lastIdxEntry []byte - if lastIdxEntry, e = readIndexEntryAtOffset(indexFile, indexSize-NeedleEntrySize); e != nil { - return fmt.Errorf("readLastIndexEntry %s failed: %v", indexFile.Name(), e) + if lastIdxEntry, e = readIndexEntryAtOffset(indexFile, indexSize-NeedleMapEntrySize); e != nil { + return 0, fmt.Errorf("readLastIndexEntry %s failed: %v", indexFile.Name(), e) } - key, offset, size := IdxFileEntry(lastIdxEntry) - if offset == 0 || size == TombstoneFileSize { - return nil + key, offset, size := idx.IdxFileEntry(lastIdxEntry) + if offset.IsZero() { + return 0, nil } - if e = verifyNeedleIntegrity(v.dataFile, v.Version(), int64(offset)*NeedlePaddingSize, key, size); e != nil { - return fmt.Errorf("verifyNeedleIntegrity %s failed: %v", indexFile.Name(), e) + if size == TombstoneFileSize { + size = 0 } - - return nil + if lastAppendAtNs, e = verifyNeedleIntegrity(v.DataBackend, v.Version(), offset.ToAcutalOffset(), key, size); e != nil { + return lastAppendAtNs, fmt.Errorf("verifyNeedleIntegrity %s failed: %v", indexFile.Name(), e) + } + return } func verifyIndexFileIntegrity(indexFile *os.File) (indexSize int64, err error) { if indexSize, err = util.GetFileSize(indexFile); err == nil { - if indexSize%NeedleEntrySize != 0 { + if indexSize%NeedleMapEntrySize != 0 { err = fmt.Errorf("index file's size is %d bytes, maybe corrupted", indexSize) } } @@ -50,19 +50,18 @@ func readIndexEntryAtOffset(indexFile *os.File, offset int64) (bytes []byte, err err = fmt.Errorf("offset %d for index file is invalid", offset) return } - bytes = make([]byte, NeedleEntrySize) + bytes = make([]byte, NeedleMapEntrySize) _, err = indexFile.ReadAt(bytes, offset) return } -func verifyNeedleIntegrity(datFile *os.File, v Version, offset int64, key NeedleId, size uint32) error { - n := new(Needle) - err := n.ReadData(datFile, offset, size, v) - if err != nil { - return err +func verifyNeedleIntegrity(datFile backend.BackendStorageFile, v needle.Version, offset int64, key NeedleId, size uint32) (lastAppendAtNs uint64, err error) { + n := new(needle.Needle) + if err = n.ReadData(datFile, offset, size, v); err != nil { + return n.AppendAtNs, err } if n.Id != key { - return fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id) + return n.AppendAtNs, fmt.Errorf("index key %#x does not match needle's Id %#x", key, n.Id) } - return nil + return n.AppendAtNs, err } diff --git a/weed/storage/volume_create.go b/weed/storage/volume_create.go index 6b3a17439..ffcb246a4 100644 --- a/weed/storage/volume_create.go +++ b/weed/storage/volume_create.go @@ -1,4 +1,4 @@ -// +build !linux +// +build !linux,!windows package storage @@ -6,12 +6,16 @@ import ( "os" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/backend" ) -func createVolumeFile(fileName string, preallocate int64) (file *os.File, e error) { - file, e = os.OpenFile(fileName, os.O_RDWR|os.O_CREATE, 0644) +func createVolumeFile(fileName string, preallocate int64, memoryMapSizeMB uint32) (backend.BackendStorageFile, error) { + file, e := os.OpenFile(fileName, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + if e != nil { + return nil, e + } if preallocate > 0 { glog.V(0).Infof("Preallocated disk space for %s is not supported", fileName) } - return file, e + return backend.NewDiskFile(file), nil } diff --git a/weed/storage/volume_create_linux.go b/weed/storage/volume_create_linux.go index 8f6bab2fe..ee599ac32 100644 --- a/weed/storage/volume_create_linux.go +++ b/weed/storage/volume_create_linux.go @@ -7,13 +7,17 @@ import ( "syscall" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/backend" ) -func createVolumeFile(fileName string, preallocate int64) (file *os.File, e error) { - file, e = os.OpenFile(fileName, os.O_RDWR|os.O_CREATE, 0644) +func createVolumeFile(fileName string, preallocate int64, memoryMapSizeMB uint32) (backend.BackendStorageFile, error) { + file, e := os.OpenFile(fileName, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + if e != nil { + return nil, e + } if preallocate != 0 { syscall.Fallocate(int(file.Fd()), 1, 0, preallocate) glog.V(0).Infof("Preallocated %d bytes disk space for %s", preallocate, fileName) } - return file, e + return backend.NewDiskFile(file), nil } diff --git a/weed/storage/volume_create_windows.go b/weed/storage/volume_create_windows.go new file mode 100644 index 000000000..e1c0b961f --- /dev/null +++ b/weed/storage/volume_create_windows.go @@ -0,0 +1,33 @@ +// +build windows + +package storage + +import ( + "github.com/chrislusf/seaweedfs/weed/storage/backend/memory_map" + "golang.org/x/sys/windows" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/backend/memory_map/os_overloads" +) + +func createVolumeFile(fileName string, preallocate int64, memoryMapSizeMB uint32) (backend.BackendStorageFile, error) { + if preallocate > 0 { + glog.V(0).Infof("Preallocated disk space for %s is not supported", fileName) + } + + if memoryMapSizeMB > 0 { + file, e := os_overloads.OpenFile(fileName, windows.O_RDWR|windows.O_CREAT, 0644, true) + if e != nil { + return nil, e + } + return memory_map.NewMemoryMappedFile(file, memoryMapSizeMB), nil + } else { + file, e := os_overloads.OpenFile(fileName, windows.O_RDWR|windows.O_CREAT|windows.O_TRUNC, 0644, false) + if e != nil { + return nil, e + } + return backend.NewDiskFile(file), nil + } + +} diff --git a/weed/storage/volume_info.go b/weed/storage/volume_info.go index f6614a9de..313818cde 100644 --- a/weed/storage/volume_info.go +++ b/weed/storage/volume_info.go @@ -5,46 +5,94 @@ import ( "sort" "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" ) type VolumeInfo struct { - Id VolumeId - Size uint64 - ReplicaPlacement *ReplicaPlacement - Ttl *TTL - Collection string - Version Version - FileCount int - DeleteCount int - DeletedByteCount uint64 - ReadOnly bool + Id needle.VolumeId + Size uint64 + ReplicaPlacement *super_block.ReplicaPlacement + Ttl *needle.TTL + Collection string + Version needle.Version + FileCount int + DeleteCount int + DeletedByteCount uint64 + ReadOnly bool + CompactRevision uint32 + ModifiedAtSecond int64 + RemoteStorageName string + RemoteStorageKey string } func NewVolumeInfo(m *master_pb.VolumeInformationMessage) (vi VolumeInfo, err error) { vi = VolumeInfo{ - Id: VolumeId(m.Id), - Size: m.Size, - Collection: m.Collection, - FileCount: int(m.FileCount), - DeleteCount: int(m.DeleteCount), - DeletedByteCount: m.DeletedByteCount, - ReadOnly: m.ReadOnly, - Version: Version(m.Version), + Id: needle.VolumeId(m.Id), + Size: m.Size, + Collection: m.Collection, + FileCount: int(m.FileCount), + DeleteCount: int(m.DeleteCount), + DeletedByteCount: m.DeletedByteCount, + ReadOnly: m.ReadOnly, + Version: needle.Version(m.Version), + CompactRevision: m.CompactRevision, + ModifiedAtSecond: m.ModifiedAtSecond, + RemoteStorageName: m.RemoteStorageName, + RemoteStorageKey: m.RemoteStorageKey, } - rp, e := NewReplicaPlacementFromByte(byte(m.ReplicaPlacement)) + rp, e := super_block.NewReplicaPlacementFromByte(byte(m.ReplicaPlacement)) if e != nil { return vi, e } vi.ReplicaPlacement = rp - vi.Ttl = LoadTTLFromUint32(m.Ttl) + vi.Ttl = needle.LoadTTLFromUint32(m.Ttl) return vi, nil } +func NewVolumeInfoFromShort(m *master_pb.VolumeShortInformationMessage) (vi VolumeInfo, err error) { + vi = VolumeInfo{ + Id: needle.VolumeId(m.Id), + Collection: m.Collection, + Version: needle.Version(m.Version), + } + rp, e := super_block.NewReplicaPlacementFromByte(byte(m.ReplicaPlacement)) + if e != nil { + return vi, e + } + vi.ReplicaPlacement = rp + vi.Ttl = needle.LoadTTLFromUint32(m.Ttl) + return vi, nil +} + +func (vi VolumeInfo) IsRemote() bool { + return vi.RemoteStorageName != "" +} + func (vi VolumeInfo) String() string { return fmt.Sprintf("Id:%d, Size:%d, ReplicaPlacement:%s, Collection:%s, Version:%v, FileCount:%d, DeleteCount:%d, DeletedByteCount:%d, ReadOnly:%v", vi.Id, vi.Size, vi.ReplicaPlacement, vi.Collection, vi.Version, vi.FileCount, vi.DeleteCount, vi.DeletedByteCount, vi.ReadOnly) } +func (vi VolumeInfo) ToVolumeInformationMessage() *master_pb.VolumeInformationMessage { + return &master_pb.VolumeInformationMessage{ + Id: uint32(vi.Id), + Size: uint64(vi.Size), + Collection: vi.Collection, + FileCount: uint64(vi.FileCount), + DeleteCount: uint64(vi.DeleteCount), + DeletedByteCount: vi.DeletedByteCount, + ReadOnly: vi.ReadOnly, + ReplicaPlacement: uint32(vi.ReplicaPlacement.Byte()), + Version: uint32(vi.Version), + Ttl: vi.Ttl.ToUint32(), + CompactRevision: vi.CompactRevision, + ModifiedAtSecond: vi.ModifiedAtSecond, + RemoteStorageName: vi.RemoteStorageName, + RemoteStorageKey: vi.RemoteStorageKey, + } +} + /*VolumesInfo sorting*/ type volumeInfos []*VolumeInfo diff --git a/weed/storage/volume_info_test.go b/weed/storage/volume_info_test.go index 9a9c43ad2..5b1bacb52 100644 --- a/weed/storage/volume_info_test.go +++ b/weed/storage/volume_info_test.go @@ -1,6 +1,10 @@ package storage -import "testing" +import ( + "testing" + + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) func TestSortVolumeInfos(t *testing.T) { vis := []*VolumeInfo{ @@ -16,7 +20,7 @@ func TestSortVolumeInfos(t *testing.T) { } sortVolumeInfos(vis) for i := 0; i < len(vis); i++ { - if vis[i].Id != VolumeId(i+1) { + if vis[i].Id != needle.VolumeId(i+1) { t.Fatal() } } diff --git a/weed/storage/volume_loading.go b/weed/storage/volume_loading.go index 37a6e07b2..6b42fc452 100644 --- a/weed/storage/volume_loading.go +++ b/weed/storage/volume_loading.go @@ -3,118 +3,148 @@ package storage import ( "fmt" "os" - "time" + + "github.com/syndtr/goleveldb/leveldb/opt" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + "github.com/chrislusf/seaweedfs/weed/util" ) -func loadVolumeWithoutIndex(dirname string, collection string, id VolumeId, needleMapKind NeedleMapType) (v *Volume, e error) { +func loadVolumeWithoutIndex(dirname string, collection string, id needle.VolumeId, needleMapKind NeedleMapType) (v *Volume, err error) { v = &Volume{dir: dirname, Collection: collection, Id: id} - v.SuperBlock = SuperBlock{} + v.SuperBlock = super_block.SuperBlock{} v.needleMapKind = needleMapKind - e = v.load(false, false, needleMapKind, 0) + err = v.load(false, false, needleMapKind, 0) return } -func (v *Volume) load(alsoLoadIndex bool, createDatIfMissing bool, needleMapKind NeedleMapType, preallocate int64) error { - var e error +func (v *Volume) load(alsoLoadIndex bool, createDatIfMissing bool, needleMapKind NeedleMapType, preallocate int64) (err error) { fileName := v.FileName() alreadyHasSuperBlock := false - if exists, canRead, canWrite, modifiedTime, fileSize := checkFile(fileName + ".dat"); exists { + hasVolumeInfoFile := v.maybeLoadVolumeInfo() && v.volumeInfo.Version != 0 + + if v.HasRemoteFile() { + v.noWriteCanDelete = true + v.noWriteOrDelete = false + glog.V(0).Infof("loading volume %d from remote %v", v.Id, v.volumeInfo.Files) + v.LoadRemoteFile() + alreadyHasSuperBlock = true + } else if exists, canRead, canWrite, modifiedTime, fileSize := util.CheckFile(fileName + ".dat"); exists { + // open dat file if !canRead { return fmt.Errorf("cannot read Volume Data file %s.dat", fileName) } + var dataFile *os.File if canWrite { - v.dataFile, e = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644) - v.lastModifiedTime = uint64(modifiedTime.Unix()) + dataFile, err = os.OpenFile(fileName+".dat", os.O_RDWR|os.O_CREATE, 0644) } else { glog.V(0).Infoln("opening " + fileName + ".dat in READONLY mode") - v.dataFile, e = os.Open(fileName + ".dat") - v.readOnly = true + dataFile, err = os.Open(fileName + ".dat") + v.noWriteOrDelete = true } - if fileSize >= _SuperBlockSize { + v.lastModifiedTsSeconds = uint64(modifiedTime.Unix()) + if fileSize >= super_block.SuperBlockSize { alreadyHasSuperBlock = true } + v.DataBackend = backend.NewDiskFile(dataFile) } else { if createDatIfMissing { - v.dataFile, e = createVolumeFile(fileName+".dat", preallocate) + v.DataBackend, err = createVolumeFile(fileName+".dat", preallocate, v.MemoryMapMaxSizeMb) } else { return fmt.Errorf("Volume Data file %s.dat does not exist.", fileName) } } - if e != nil { - if !os.IsPermission(e) { - return fmt.Errorf("cannot load Volume Data %s.dat: %v", fileName, e) + if err != nil { + if !os.IsPermission(err) { + return fmt.Errorf("cannot load Volume Data %s.dat: %v", fileName, err) } else { - return fmt.Errorf("load data file %s.dat: %v", fileName, e) + return fmt.Errorf("load data file %s.dat: %v", fileName, err) } } if alreadyHasSuperBlock { - e = v.readSuperBlock() + err = v.readSuperBlock() } else { - e = v.maybeWriteSuperBlock() + if !v.SuperBlock.Initialized() { + return fmt.Errorf("volume %s.dat not initialized", fileName) + } + err = v.maybeWriteSuperBlock() } - if e == nil && alsoLoadIndex { + if err == nil && alsoLoadIndex { var indexFile *os.File - if v.readOnly { - glog.V(1).Infoln("open to read file", fileName+".idx") - if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDONLY, 0644); e != nil { - return fmt.Errorf("cannot read Volume Index %s.idx: %v", fileName, e) + if v.noWriteOrDelete { + glog.V(0).Infoln("open to read file", fileName+".idx") + if indexFile, err = os.OpenFile(fileName+".idx", os.O_RDONLY, 0644); err != nil { + return fmt.Errorf("cannot read Volume Index %s.idx: %v", fileName, err) } } else { glog.V(1).Infoln("open to write file", fileName+".idx") - if indexFile, e = os.OpenFile(fileName+".idx", os.O_RDWR|os.O_CREATE, 0644); e != nil { - return fmt.Errorf("cannot write Volume Index %s.idx: %v", fileName, e) + if indexFile, err = os.OpenFile(fileName+".idx", os.O_RDWR|os.O_CREATE, 0644); err != nil { + return fmt.Errorf("cannot write Volume Index %s.idx: %v", fileName, err) } } - if e = CheckVolumeDataIntegrity(v, indexFile); e != nil { - v.readOnly = true - glog.V(0).Infof("volumeDataIntegrityChecking failed %v", e) + if v.lastAppendAtNs, err = CheckVolumeDataIntegrity(v, indexFile); err != nil { + v.noWriteOrDelete = true + glog.V(0).Infof("volumeDataIntegrityChecking failed %v", err) } - switch needleMapKind { - case NeedleMapInMemory: - glog.V(0).Infoln("loading index", fileName+".idx", "to memory readonly", v.readOnly) - if v.nm, e = LoadCompactNeedleMap(indexFile); e != nil { - glog.V(0).Infof("loading index %s to memory error: %v", fileName+".idx", e) - } - case NeedleMapLevelDb: - glog.V(0).Infoln("loading leveldb", fileName+".ldb") - if v.nm, e = NewLevelDbNeedleMap(fileName+".ldb", indexFile); e != nil { - glog.V(0).Infof("loading leveldb %s error: %v", fileName+".ldb", e) - } - case NeedleMapBoltDb: - glog.V(0).Infoln("loading boltdb", fileName+".bdb") - if v.nm, e = NewBoltDbNeedleMap(fileName+".bdb", indexFile); e != nil { - glog.V(0).Infof("loading boltdb %s error: %v", fileName+".bdb", e) + + if v.noWriteOrDelete || v.noWriteCanDelete { + if v.nm, err = NewSortedFileNeedleMap(fileName, indexFile); err != nil { + glog.V(0).Infof("loading sorted db %s error: %v", fileName+".sdx", err) } - case NeedleMapBtree: - glog.V(0).Infoln("loading index", fileName+".idx", "to btree readonly", v.readOnly) - if v.nm, e = LoadBtreeNeedleMap(indexFile); e != nil { - glog.V(0).Infof("loading index %s to btree error: %v", fileName+".idx", e) + } else { + switch needleMapKind { + case NeedleMapInMemory: + glog.V(0).Infoln("loading index", fileName+".idx", "to memory") + if v.nm, err = LoadCompactNeedleMap(indexFile); err != nil { + glog.V(0).Infof("loading index %s to memory error: %v", fileName+".idx", err) + } + case NeedleMapLevelDb: + glog.V(0).Infoln("loading leveldb", fileName+".ldb") + opts := &opt.Options{ + BlockCacheCapacity: 2 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 1 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 10, // default value is 1 + } + if v.nm, err = NewLevelDbNeedleMap(fileName+".ldb", indexFile, opts); err != nil { + glog.V(0).Infof("loading leveldb %s error: %v", fileName+".ldb", err) + } + case NeedleMapLevelDbMedium: + glog.V(0).Infoln("loading leveldb medium", fileName+".ldb") + opts := &opt.Options{ + BlockCacheCapacity: 4 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 2 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 10, // default value is 1 + } + if v.nm, err = NewLevelDbNeedleMap(fileName+".ldb", indexFile, opts); err != nil { + glog.V(0).Infof("loading leveldb %s error: %v", fileName+".ldb", err) + } + case NeedleMapLevelDbLarge: + glog.V(0).Infoln("loading leveldb large", fileName+".ldb") + opts := &opt.Options{ + BlockCacheCapacity: 8 * 1024 * 1024, // default value is 8MiB + WriteBuffer: 4 * 1024 * 1024, // default value is 4MiB + CompactionTableSizeMultiplier: 10, // default value is 1 + } + if v.nm, err = NewLevelDbNeedleMap(fileName+".ldb", indexFile, opts); err != nil { + glog.V(0).Infof("loading leveldb %s error: %v", fileName+".ldb", err) + } } } } - return e -} - -func checkFile(filename string) (exists, canRead, canWrite bool, modTime time.Time, fileSize int64) { - exists = true - fi, err := os.Stat(filename) - if os.IsNotExist(err) { - exists = false - return - } - if fi.Mode()&0400 != 0 { - canRead = true - } - if fi.Mode()&0200 != 0 { - canWrite = true + if !hasVolumeInfoFile { + v.volumeInfo.Version = uint32(v.SuperBlock.Version) + v.SaveVolumeInfo() } - modTime = fi.ModTime() - fileSize = fi.Size() - return + + stats.VolumeServerVolumeCounter.WithLabelValues(v.Collection, "volume").Inc() + + return err } diff --git a/weed/storage/volume_read_write.go b/weed/storage/volume_read_write.go index 41f049674..ac6154cef 100644 --- a/weed/storage/volume_read_write.go +++ b/weed/storage/volume_read_write.go @@ -9,24 +9,30 @@ import ( "time" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" . "github.com/chrislusf/seaweedfs/weed/storage/types" ) +var ErrorNotFound = errors.New("not found") + // isFileUnchanged checks whether this needle to write is same as last one. // It requires serialized access in the same volume. -func (v *Volume) isFileUnchanged(n *Needle) bool { +func (v *Volume) isFileUnchanged(n *needle.Needle) bool { if v.Ttl.String() != "" { return false } + nv, ok := v.nm.Get(n.Id) - if ok && nv.Offset > 0 { - oldNeedle := new(Needle) - err := oldNeedle.ReadData(v.dataFile, int64(nv.Offset)*NeedlePaddingSize, nv.Size, v.Version()) + if ok && !nv.Offset.IsZero() && nv.Size != TombstoneFileSize { + oldNeedle := new(needle.Needle) + err := oldNeedle.ReadData(v.DataBackend, nv.Offset.ToAcutalOffset(), nv.Size, v.Version()) if err != nil { - glog.V(0).Infof("Failed to check updated file %v", err) + glog.V(0).Infof("Failed to check updated file at offset %d size %d: %v", nv.Offset.ToAcutalOffset(), nv.Size, err) return false } - if oldNeedle.Checksum == n.Checksum && bytes.Equal(oldNeedle.Data, n.Data) { + if oldNeedle.Cookie == n.Cookie && oldNeedle.Checksum == n.Checksum && bytes.Equal(oldNeedle.Data, n.Data) { n.DataSize = oldNeedle.DataSize return true } @@ -36,131 +42,115 @@ func (v *Volume) isFileUnchanged(n *Needle) bool { // Destroy removes everything related to this volume func (v *Volume) Destroy() (err error) { - if v.readOnly { - err = fmt.Errorf("%s is read-only", v.dataFile.Name()) + if v.isCompacting { + err = fmt.Errorf("volume %d is compacting", v.Id) return } - v.Close() - err = os.Remove(v.dataFile.Name()) - if err != nil { - return + storageName, storageKey := v.RemoteStorageNameKey() + if v.HasRemoteFile() && storageName != "" && storageKey != "" { + if backendStorage, found := backend.BackendStorages[storageName]; found { + backendStorage.DeleteFile(storageKey) + } } - err = v.nm.Destroy() + v.Close() + os.Remove(v.FileName() + ".dat") + os.Remove(v.FileName() + ".idx") + os.Remove(v.FileName() + ".vif") + os.Remove(v.FileName() + ".sdx") os.Remove(v.FileName() + ".cpd") os.Remove(v.FileName() + ".cpx") - os.Remove(v.FileName() + ".ldb") - os.Remove(v.FileName() + ".bdb") + os.RemoveAll(v.FileName() + ".ldb") return } -// AppendBlob append a blob to end of the data file, used in replication -func (v *Volume) AppendBlob(b []byte) (offset int64, err error) { - if v.readOnly { - err = fmt.Errorf("%s is read-only", v.dataFile.Name()) - return - } - v.dataFileAccessLock.Lock() - defer v.dataFileAccessLock.Unlock() - if offset, err = v.dataFile.Seek(0, 2); err != nil { - glog.V(0).Infof("failed to seek the end of file: %v", err) - return - } - //ensure file writing starting from aligned positions - if offset%NeedlePaddingSize != 0 { - offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize) - if offset, err = v.dataFile.Seek(offset, 0); err != nil { - glog.V(0).Infof("failed to align in datafile %s: %v", v.dataFile.Name(), err) - return - } - } - _, err = v.dataFile.Write(b) - return -} - -func (v *Volume) writeNeedle(n *Needle) (size uint32, err error) { - glog.V(4).Infof("writing needle %s", NewFileIdFromNeedle(v.Id, n).String()) - if v.readOnly { - err = fmt.Errorf("%s is read-only", v.dataFile.Name()) - return - } +func (v *Volume) writeNeedle(n *needle.Needle) (offset uint64, size uint32, isUnchanged bool, err error) { + // glog.V(4).Infof("writing needle %s", needle.NewFileIdFromNeedle(v.Id, n).String()) v.dataFileAccessLock.Lock() defer v.dataFileAccessLock.Unlock() if v.isFileUnchanged(n) { size = n.DataSize - glog.V(4).Infof("needle is unchanged!") + isUnchanged = true return } - var offset int64 - if offset, err = v.dataFile.Seek(0, 2); err != nil { - glog.V(0).Infof("failed to seek the end of file: %v", err) - return + + if n.Ttl == needle.EMPTY_TTL && v.Ttl != needle.EMPTY_TTL { + n.SetHasTtl() + n.Ttl = v.Ttl } - //ensure file writing starting from aligned positions - if offset%NeedlePaddingSize != 0 { - offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize) - if offset, err = v.dataFile.Seek(offset, 0); err != nil { - glog.V(0).Infof("failed to align in datafile %s: %v", v.dataFile.Name(), err) + // check whether existing needle cookie matches + nv, ok := v.nm.Get(n.Id) + if ok { + existingNeedle, _, _, existingNeedleReadErr := needle.ReadNeedleHeader(v.DataBackend, v.Version(), nv.Offset.ToAcutalOffset()) + if existingNeedleReadErr != nil { + err = fmt.Errorf("reading existing needle: %v", existingNeedleReadErr) + return + } + if existingNeedle.Cookie != n.Cookie { + glog.V(0).Infof("write cookie mismatch: existing %x, new %x", existingNeedle.Cookie, n.Cookie) + err = fmt.Errorf("mismatching cookie %x", n.Cookie) return } } + // append to dat file n.AppendAtNs = uint64(time.Now().UnixNano()) - if size, _, err = n.Append(v.dataFile, v.Version()); err != nil { - if e := v.dataFile.Truncate(offset); e != nil { - err = fmt.Errorf("%s\ncannot truncate %s: %v", err, v.dataFile.Name(), e) - } + if offset, size, _, err = n.Append(v.DataBackend, v.Version()); err != nil { return } + v.lastAppendAtNs = n.AppendAtNs - nv, ok := v.nm.Get(n.Id) - if !ok || int64(nv.Offset)*NeedlePaddingSize < offset { - if err = v.nm.Put(n.Id, Offset(offset/NeedlePaddingSize), n.Size); err != nil { + // add to needle map + if !ok || uint64(nv.Offset.ToAcutalOffset()) < offset { + if err = v.nm.Put(n.Id, ToOffset(int64(offset)), n.Size); err != nil { glog.V(4).Infof("failed to save in needle map %d: %v", n.Id, err) } } - if v.lastModifiedTime < n.LastModified { - v.lastModifiedTime = n.LastModified + if v.lastModifiedTsSeconds < n.LastModified { + v.lastModifiedTsSeconds = n.LastModified } return } -func (v *Volume) deleteNeedle(n *Needle) (uint32, error) { - glog.V(4).Infof("delete needle %s", NewFileIdFromNeedle(v.Id, n).String()) - if v.readOnly { - return 0, fmt.Errorf("%s is read-only", v.dataFile.Name()) - } +func (v *Volume) deleteNeedle(n *needle.Needle) (uint32, error) { + glog.V(4).Infof("delete needle %s", needle.NewFileIdFromNeedle(v.Id, n).String()) v.dataFileAccessLock.Lock() defer v.dataFileAccessLock.Unlock() nv, ok := v.nm.Get(n.Id) //fmt.Println("key", n.Id, "volume offset", nv.Offset, "data_size", n.Size, "cached size", nv.Size) if ok && nv.Size != TombstoneFileSize { size := nv.Size - offset, err := v.dataFile.Seek(0, 2) + n.Data = nil + n.AppendAtNs = uint64(time.Now().UnixNano()) + offset, _, _, err := n.Append(v.DataBackend, v.Version()) if err != nil { return size, err } - if err := v.nm.Delete(n.Id, Offset(offset/NeedlePaddingSize)); err != nil { + v.lastAppendAtNs = n.AppendAtNs + if err = v.nm.Delete(n.Id, ToOffset(int64(offset))); err != nil { return size, err } - n.Data = nil - n.AppendAtNs = uint64(time.Now().UnixNano()) - _, _, err = n.Append(v.dataFile, v.Version()) return size, err } return 0, nil } // read fills in Needle content by looking up n.Id from NeedleMapper -func (v *Volume) readNeedle(n *Needle) (int, error) { +func (v *Volume) readNeedle(n *needle.Needle) (int, error) { + v.dataFileAccessLock.RLock() + defer v.dataFileAccessLock.RUnlock() + nv, ok := v.nm.Get(n.Id) - if !ok || nv.Offset == 0 { - return -1, errors.New("Not Found") + if !ok || nv.Offset.IsZero() { + return -1, ErrorNotFound } if nv.Size == TombstoneFileSize { - return -1, errors.New("Already Deleted") + return -1, errors.New("already deleted") + } + if nv.Size == 0 { + return 0, nil } - err := n.ReadData(v.dataFile, int64(nv.Offset)*NeedlePaddingSize, nv.Size, v.Version()) + err := n.ReadData(v.DataBackend, nv.Offset.ToAcutalOffset(), nv.Size, v.Version()) if err != nil { return 0, err } @@ -178,55 +168,70 @@ func (v *Volume) readNeedle(n *Needle) (int, error) { if uint64(time.Now().Unix()) < n.LastModified+uint64(ttlMinutes*60) { return bytesRead, nil } - return -1, errors.New("Not Found") + return -1, ErrorNotFound } -func ScanVolumeFile(dirname string, collection string, id VolumeId, +type VolumeFileScanner interface { + VisitSuperBlock(super_block.SuperBlock) error + ReadNeedleBody() bool + VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error +} + +func ScanVolumeFile(dirname string, collection string, id needle.VolumeId, needleMapKind NeedleMapType, - visitSuperBlock func(SuperBlock) error, - readNeedleBody bool, - visitNeedle func(n *Needle, offset int64) error) (err error) { + volumeFileScanner VolumeFileScanner) (err error) { var v *Volume if v, err = loadVolumeWithoutIndex(dirname, collection, id, needleMapKind); err != nil { - return fmt.Errorf("Failed to load volume %d: %v", id, err) + return fmt.Errorf("failed to load volume %d: %v", id, err) } - if err = visitSuperBlock(v.SuperBlock); err != nil { - return fmt.Errorf("Failed to process volume %d super block: %v", id, err) + if v.volumeInfo.Version == 0 { + if err = volumeFileScanner.VisitSuperBlock(v.SuperBlock); err != nil { + return fmt.Errorf("failed to process volume %d super block: %v", id, err) + } } + defer v.Close() version := v.Version() offset := int64(v.SuperBlock.BlockSize()) - n, rest, e := ReadNeedleHeader(v.dataFile, version, offset) + + return ScanVolumeFileFrom(version, v.DataBackend, offset, volumeFileScanner) +} + +func ScanVolumeFileFrom(version needle.Version, datBackend backend.BackendStorageFile, offset int64, volumeFileScanner VolumeFileScanner) (err error) { + n, nh, rest, e := needle.ReadNeedleHeader(datBackend, version, offset) if e != nil { - err = fmt.Errorf("cannot read needle header: %v", e) - return + if e == io.EOF { + return nil + } + return fmt.Errorf("cannot read %s at offset %d: %v", datBackend.Name(), offset, e) } for n != nil { - if readNeedleBody { - if err = n.ReadNeedleBody(v.dataFile, version, offset+NeedleEntrySize, rest); err != nil { + var needleBody []byte + if volumeFileScanner.ReadNeedleBody() { + if needleBody, err = n.ReadNeedleBody(datBackend, version, offset+NeedleHeaderSize, rest); err != nil { glog.V(0).Infof("cannot read needle body: %v", err) //err = fmt.Errorf("cannot read needle body: %v", err) //return } } - err = visitNeedle(n, offset) + err := volumeFileScanner.VisitNeedle(n, offset, nh, needleBody) if err == io.EOF { return nil } if err != nil { glog.V(0).Infof("visit needle error: %v", err) + return fmt.Errorf("visit needle error: %v", err) } - offset += NeedleEntrySize + rest + offset += NeedleHeaderSize + rest glog.V(4).Infof("==> new entry offset %d", offset) - if n, rest, err = ReadNeedleHeader(v.dataFile, version, offset); err != nil { + if n, nh, rest, err = needle.ReadNeedleHeader(datBackend, version, offset); err != nil { if err == io.EOF { return nil } - return fmt.Errorf("cannot read needle header: %v", err) + return fmt.Errorf("cannot read needle header at offset %d: %v", offset, err) } glog.V(4).Infof("new entry needle size:%d rest:%d", n.Size, rest) } - - return + return nil } diff --git a/weed/storage/volume_super_block.go b/weed/storage/volume_super_block.go index 6435a051f..5e913e062 100644 --- a/weed/storage/volume_super_block.go +++ b/weed/storage/volume_super_block.go @@ -5,83 +5,29 @@ import ( "os" "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/pb/master_pb" - "github.com/chrislusf/seaweedfs/weed/util" - "github.com/golang/protobuf/proto" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" ) -const ( - _SuperBlockSize = 8 -) - -/* -* Super block currently has 8 bytes allocated for each volume. -* Byte 0: version, 1 or 2 -* Byte 1: Replica Placement strategy, 000, 001, 002, 010, etc -* Byte 2 and byte 3: Time to live. See TTL for definition -* Byte 4 and byte 5: The number of times the volume has been compacted. -* Rest bytes: Reserved - */ -type SuperBlock struct { - version Version - ReplicaPlacement *ReplicaPlacement - Ttl *TTL - CompactRevision uint16 - Extra *master_pb.SuperBlockExtra - extraSize uint16 -} - -func (s *SuperBlock) BlockSize() int { - switch s.version { - case Version2, Version3: - return _SuperBlockSize + int(s.extraSize) - } - return _SuperBlockSize -} - -func (s *SuperBlock) Version() Version { - return s.version -} -func (s *SuperBlock) Bytes() []byte { - header := make([]byte, _SuperBlockSize) - header[0] = byte(s.version) - header[1] = s.ReplicaPlacement.Byte() - s.Ttl.ToBytes(header[2:4]) - util.Uint16toBytes(header[4:6], s.CompactRevision) - - if s.Extra != nil { - extraData, err := proto.Marshal(s.Extra) - if err != nil { - glog.Fatalf("cannot marshal super block extra %+v: %v", s.Extra, err) - } - extraSize := len(extraData) - if extraSize > 256*256-2 { - // reserve a couple of bits for future extension - glog.Fatalf("super block extra size is %d bigger than %d", extraSize, 256*256-2) - } - s.extraSize = uint16(extraSize) - util.Uint16toBytes(header[6:8], s.extraSize) - - header = append(header, extraData...) - } - - return header -} - func (v *Volume) maybeWriteSuperBlock() error { - stat, e := v.dataFile.Stat() + + datSize, _, e := v.DataBackend.GetStat() if e != nil { - glog.V(0).Infof("failed to stat datafile %s: %v", v.dataFile.Name(), e) + glog.V(0).Infof("failed to stat datafile %s: %v", v.DataBackend.Name(), e) return e } - if stat.Size() == 0 { - v.SuperBlock.version = CurrentVersion - _, e = v.dataFile.Write(v.SuperBlock.Bytes()) + if datSize == 0 { + v.SuperBlock.Version = needle.CurrentVersion + _, e = v.DataBackend.WriteAt(v.SuperBlock.Bytes(), 0) if e != nil && os.IsPermission(e) { //read-only, but zero length - recreate it! - if v.dataFile, e = os.Create(v.dataFile.Name()); e == nil { - if _, e = v.dataFile.Write(v.SuperBlock.Bytes()); e == nil { - v.readOnly = false + var dataFile *os.File + if dataFile, e = os.Create(v.DataBackend.Name()); e == nil { + v.DataBackend = backend.NewDiskFile(dataFile) + if _, e = v.DataBackend.WriteAt(v.SuperBlock.Bytes(), 0); e == nil { + v.noWriteOrDelete = false + v.noWriteCanDelete = false } } } @@ -90,40 +36,13 @@ func (v *Volume) maybeWriteSuperBlock() error { } func (v *Volume) readSuperBlock() (err error) { - v.SuperBlock, err = ReadSuperBlock(v.dataFile) - return err -} - -// ReadSuperBlock reads from data file and load it into volume's super block -func ReadSuperBlock(dataFile *os.File) (superBlock SuperBlock, err error) { - if _, err = dataFile.Seek(0, 0); err != nil { - err = fmt.Errorf("cannot seek to the beginning of %s: %v", dataFile.Name(), err) - return - } - header := make([]byte, _SuperBlockSize) - if _, e := dataFile.Read(header); e != nil { - err = fmt.Errorf("cannot read volume %s super block: %v", dataFile.Name(), e) - return - } - superBlock.version = Version(header[0]) - if superBlock.ReplicaPlacement, err = NewReplicaPlacementFromByte(header[1]); err != nil { - err = fmt.Errorf("cannot read replica type: %s", err.Error()) - return - } - superBlock.Ttl = LoadTTLFromBytes(header[2:4]) - superBlock.CompactRevision = util.BytesToUint16(header[4:6]) - superBlock.extraSize = util.BytesToUint16(header[6:8]) - - if superBlock.extraSize > 0 { - // read more - extraData := make([]byte, int(superBlock.extraSize)) - superBlock.Extra = &master_pb.SuperBlockExtra{} - err = proto.Unmarshal(extraData, superBlock.Extra) - if err != nil { - err = fmt.Errorf("cannot read volume %s super block extra: %v", dataFile.Name(), err) - return + v.SuperBlock, err = super_block.ReadSuperBlock(v.DataBackend) + if v.volumeInfo != nil && v.volumeInfo.Replication != "" { + if replication, err := super_block.NewReplicaPlacementFromString(v.volumeInfo.Replication); err != nil { + return fmt.Errorf("Error parse volume %d replication %s : %v", v.Id, v.volumeInfo.Replication, err) + } else { + v.SuperBlock.ReplicaPlacement = replication } } - - return + return err } diff --git a/weed/storage/volume_sync.go b/weed/storage/volume_sync.go deleted file mode 100644 index 6eb7a61a5..000000000 --- a/weed/storage/volume_sync.go +++ /dev/null @@ -1,211 +0,0 @@ -package storage - -import ( - "context" - "fmt" - "os" - "sort" - - "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/operation" - "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - "github.com/chrislusf/seaweedfs/weed/storage/needle" - . "github.com/chrislusf/seaweedfs/weed/storage/types" -) - -// The volume sync with a master volume via 2 steps: -// 1. The slave checks master side to find subscription checkpoint -// to setup the replication. -// 2. The slave receives the updates from master - -/* -Assume the slave volume needs to follow the master volume. - -The master volume could be compacted, and could be many files ahead of -slave volume. - -Step 1: -The slave volume will ask the master volume for a snapshot -of (existing file entries, last offset, number of compacted times). - -For each entry x in master existing file entries: - if x does not exist locally: - add x locally - -For each entry y in local slave existing file entries: - if y does not exist on master: - delete y locally - -Step 2: -After this, use the last offset and number of compacted times to request -the master volume to send a new file, and keep looping. If the number of -compacted times is changed, go back to step 1 (very likely this can be -optimized more later). - -*/ - -func (v *Volume) Synchronize(volumeServer string) (err error) { - var lastCompactRevision uint16 = 0 - var compactRevision uint16 = 0 - var masterMap *needle.CompactMap - for i := 0; i < 3; i++ { - if masterMap, _, compactRevision, err = fetchVolumeFileEntries(volumeServer, v.Id); err != nil { - return fmt.Errorf("Failed to sync volume %d entries with %s: %v", v.Id, volumeServer, err) - } - if lastCompactRevision != compactRevision && lastCompactRevision != 0 { - if err = v.Compact(0); err != nil { - return fmt.Errorf("Compact Volume before synchronizing %v", err) - } - if err = v.commitCompact(); err != nil { - return fmt.Errorf("Commit Compact before synchronizing %v", err) - } - } - lastCompactRevision = compactRevision - if err = v.trySynchronizing(volumeServer, masterMap, compactRevision); err == nil { - return - } - } - return -} - -type ByOffset []needle.NeedleValue - -func (a ByOffset) Len() int { return len(a) } -func (a ByOffset) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a ByOffset) Less(i, j int) bool { return a[i].Offset < a[j].Offset } - -// trySynchronizing sync with remote volume server incrementally by -// make up the local and remote delta. -func (v *Volume) trySynchronizing(volumeServer string, masterMap *needle.CompactMap, compactRevision uint16) error { - slaveIdxFile, err := os.Open(v.nm.IndexFileName()) - if err != nil { - return fmt.Errorf("Open volume %d index file: %v", v.Id, err) - } - defer slaveIdxFile.Close() - slaveMap, err := LoadBtreeNeedleMap(slaveIdxFile) - if err != nil { - return fmt.Errorf("Load volume %d index file: %v", v.Id, err) - } - var delta []needle.NeedleValue - if err := masterMap.Visit(func(needleValue needle.NeedleValue) error { - if needleValue.Key == NeedleIdEmpty { - return nil - } - if _, ok := slaveMap.Get(needleValue.Key); ok { - return nil // skip intersection - } - delta = append(delta, needleValue) - return nil - }); err != nil { - return fmt.Errorf("Add master entry: %v", err) - } - if err := slaveMap.m.Visit(func(needleValue needle.NeedleValue) error { - if needleValue.Key == NeedleIdEmpty { - return nil - } - if _, ok := masterMap.Get(needleValue.Key); ok { - return nil // skip intersection - } - needleValue.Size = 0 - delta = append(delta, needleValue) - return nil - }); err != nil { - return fmt.Errorf("Remove local entry: %v", err) - } - - // simulate to same ordering of remote .dat file needle entries - sort.Sort(ByOffset(delta)) - - // make up the delta - fetchCount := 0 - for _, needleValue := range delta { - if needleValue.Size == 0 { - // remove file entry from local - v.removeNeedle(needleValue.Key) - continue - } - // add master file entry to local data file - if err := v.fetchNeedle(volumeServer, needleValue, compactRevision); err != nil { - glog.V(0).Infof("Fetch needle %v from %s: %v", needleValue, volumeServer, err) - return err - } - fetchCount++ - } - glog.V(1).Infof("Fetched %d needles from %s", fetchCount, volumeServer) - return nil -} - -func fetchVolumeFileEntries(volumeServer string, vid VolumeId) (m *needle.CompactMap, lastOffset uint64, compactRevision uint16, err error) { - m = needle.NewCompactMap() - - syncStatus, err := operation.GetVolumeSyncStatus(volumeServer, uint32(vid)) - if err != nil { - return m, 0, 0, err - } - - total := 0 - err = operation.GetVolumeIdxEntries(volumeServer, uint32(vid), func(key NeedleId, offset Offset, size uint32) { - // println("remote key", key, "offset", offset*NeedlePaddingSize, "size", size) - if offset > 0 && size != TombstoneFileSize { - m.Set(NeedleId(key), offset, size) - } else { - m.Delete(NeedleId(key)) - } - total++ - }) - - glog.V(2).Infof("server %s volume %d, entries %d, last offset %d, revision %d", volumeServer, vid, total, syncStatus.TailOffset, syncStatus.CompactRevision) - return m, syncStatus.TailOffset, uint16(syncStatus.CompactRevision), err - -} - -func (v *Volume) GetVolumeSyncStatus() *volume_server_pb.VolumeSyncStatusResponse { - var syncStatus = &volume_server_pb.VolumeSyncStatusResponse{} - if stat, err := v.dataFile.Stat(); err == nil { - syncStatus.TailOffset = uint64(stat.Size()) - } - syncStatus.IdxFileSize = v.nm.IndexFileSize() - syncStatus.CompactRevision = uint32(v.SuperBlock.CompactRevision) - syncStatus.Ttl = v.SuperBlock.Ttl.String() - syncStatus.Replication = v.SuperBlock.ReplicaPlacement.String() - return syncStatus -} - -func (v *Volume) IndexFileContent() ([]byte, error) { - return v.nm.IndexFileContent() -} - -// removeNeedle removes one needle by needle key -func (v *Volume) removeNeedle(key NeedleId) { - n := new(Needle) - n.Id = key - v.deleteNeedle(n) -} - -// fetchNeedle fetches a remote volume needle by vid, id, offset -// The compact revision is checked first in case the remote volume -// is compacted and the offset is invalid any more. -func (v *Volume) fetchNeedle(volumeServer string, needleValue needle.NeedleValue, compactRevision uint16) error { - - return operation.WithVolumeServerClient(volumeServer, func(client volume_server_pb.VolumeServerClient) error { - resp, err := client.VolumeSyncData(context.Background(), &volume_server_pb.VolumeSyncDataRequest{ - VolumdId: uint32(v.Id), - Revision: uint32(compactRevision), - Offset: uint32(needleValue.Offset), - Size: uint32(needleValue.Size), - NeedleId: needleValue.Key.String(), - }) - if err != nil { - return err - } - - offset, err := v.AppendBlob(resp.FileContent) - if err != nil { - return fmt.Errorf("Appending volume %d error: %v", v.Id, err) - } - // println("add key", needleValue.Key, "offset", offset, "size", needleValue.Size) - v.nm.Put(needleValue.Key, Offset(offset/NeedlePaddingSize), needleValue.Size) - return nil - }) - -} diff --git a/weed/storage/volume_tier.go b/weed/storage/volume_tier.go new file mode 100644 index 000000000..fd7b08654 --- /dev/null +++ b/weed/storage/volume_tier.go @@ -0,0 +1,50 @@ +package storage + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" + "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + _ "github.com/chrislusf/seaweedfs/weed/storage/backend/s3_backend" +) + +func (v *Volume) GetVolumeInfo() *volume_server_pb.VolumeInfo { + return v.volumeInfo +} + +func (v *Volume) maybeLoadVolumeInfo() (found bool) { + + v.volumeInfo, v.hasRemoteFile, _ = pb.MaybeLoadVolumeInfo(v.FileName() + ".vif") + + if v.hasRemoteFile { + glog.V(0).Infof("volume %d is tiered to %s as %s and read only", v.Id, + v.volumeInfo.Files[0].BackendName(), v.volumeInfo.Files[0].Key) + } + + return + +} + +func (v *Volume) HasRemoteFile() bool { + return v.hasRemoteFile +} + +func (v *Volume) LoadRemoteFile() error { + tierFile := v.volumeInfo.GetFiles()[0] + backendStorage := backend.BackendStorages[tierFile.BackendName()] + + if v.DataBackend != nil { + v.DataBackend.Close() + } + + v.DataBackend = backendStorage.NewStorageFile(tierFile.Key, v.volumeInfo) + return nil +} + +func (v *Volume) SaveVolumeInfo() error { + + tierFileName := v.FileName() + ".vif" + + return pb.SaveVolumeInfo(tierFileName, v.volumeInfo) + +} diff --git a/weed/storage/volume_vacuum.go b/weed/storage/volume_vacuum.go index 88df76251..669d5dd6c 100644 --- a/weed/storage/volume_vacuum.go +++ b/weed/storage/volume_vacuum.go @@ -3,9 +3,16 @@ package storage import ( "fmt" "os" + "runtime" "time" "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/stats" + "github.com/chrislusf/seaweedfs/weed/storage/backend" + idx2 "github.com/chrislusf/seaweedfs/weed/storage/idx" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/needle_map" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" . "github.com/chrislusf/seaweedfs/weed/storage/types" "github.com/chrislusf/seaweedfs/weed/util" ) @@ -14,41 +21,88 @@ func (v *Volume) garbageLevel() float64 { if v.ContentSize() == 0 { return 0 } - return float64(v.nm.DeletedSize()) / float64(v.ContentSize()) + deletedSize := v.DeletedSize() + fileSize := v.ContentSize() + if v.DeletedCount() > 0 && v.DeletedSize() == 0 { + // this happens for .sdx converted back to normal .idx + // where deleted entry size is missing + datFileSize, _, _ := v.FileStat() + deletedSize = datFileSize - fileSize - super_block.SuperBlockSize + fileSize = datFileSize + } + return float64(deletedSize) / float64(fileSize) } -func (v *Volume) Compact(preallocate int64) error { +// compact a volume based on deletions in .dat files +func (v *Volume) Compact(preallocate int64, compactionBytePerSecond int64) error { + + if v.MemoryMapMaxSizeMb != 0 { //it makes no sense to compact in memory + return nil + } glog.V(3).Infof("Compacting volume %d ...", v.Id) //no need to lock for copy on write //v.accessLock.Lock() //defer v.accessLock.Unlock() //glog.V(3).Infof("Got Compaction lock...") + v.isCompacting = true + defer func() { + v.isCompacting = false + }() filePath := v.FileName() - v.lastCompactIndexOffset = v.nm.IndexFileSize() - v.lastCompactRevision = v.SuperBlock.CompactRevision + v.lastCompactIndexOffset = v.IndexFileSize() + v.lastCompactRevision = v.SuperBlock.CompactionRevision glog.V(3).Infof("creating copies for volume %d ,last offset %d...", v.Id, v.lastCompactIndexOffset) - return v.copyDataAndGenerateIndexFile(filePath+".cpd", filePath+".cpx", preallocate) + return v.copyDataAndGenerateIndexFile(filePath+".cpd", filePath+".cpx", preallocate, compactionBytePerSecond) } -func (v *Volume) Compact2() error { +// compact a volume based on deletions in .idx files +func (v *Volume) Compact2(preallocate int64, compactionBytePerSecond int64) error { + + if v.MemoryMapMaxSizeMb != 0 { //it makes no sense to compact in memory + return nil + } glog.V(3).Infof("Compact2 volume %d ...", v.Id) + + v.isCompacting = true + defer func() { + v.isCompacting = false + }() + filePath := v.FileName() + v.lastCompactIndexOffset = v.IndexFileSize() + v.lastCompactRevision = v.SuperBlock.CompactionRevision glog.V(3).Infof("creating copies for volume %d ...", v.Id) - return v.copyDataBasedOnIndexFile(filePath+".cpd", filePath+".cpx") + return copyDataBasedOnIndexFile(filePath+".dat", filePath+".idx", filePath+".cpd", filePath+".cpx", v.SuperBlock, v.Version(), preallocate, compactionBytePerSecond) } -func (v *Volume) commitCompact() error { +func (v *Volume) CommitCompact() error { + if v.MemoryMapMaxSizeMb != 0 { //it makes no sense to compact in memory + return nil + } glog.V(0).Infof("Committing volume %d vacuuming...", v.Id) + + v.isCompacting = true + defer func() { + v.isCompacting = false + }() + v.dataFileAccessLock.Lock() defer v.dataFileAccessLock.Unlock() + glog.V(3).Infof("Got volume %d committing lock...", v.Id) v.nm.Close() - _ = v.dataFile.Close() + if v.DataBackend != nil { + if err := v.DataBackend.Close(); err != nil { + glog.V(0).Infof("fail to close volume %d", v.Id) + } + } + v.DataBackend = nil + stats.VolumeServerVolumeCounter.WithLabelValues(v.Collection, "volume").Dec() var e error if e = v.makeupDiff(v.FileName()+".cpd", v.FileName()+".cpx", v.FileName()+".dat", v.FileName()+".idx"); e != nil { - glog.V(0).Infof("makeupDiff in commitCompact volume %d failed %v", v.Id, e) + glog.V(0).Infof("makeupDiff in CommitCompact volume %d failed %v", v.Id, e) e = os.Remove(v.FileName() + ".cpd") if e != nil { return e @@ -58,12 +112,22 @@ func (v *Volume) commitCompact() error { return e } } else { + if runtime.GOOS == "windows" { + e = os.RemoveAll(v.FileName() + ".dat") + if e != nil { + return e + } + e = os.RemoveAll(v.FileName() + ".idx") + if e != nil { + return e + } + } var e error if e = os.Rename(v.FileName()+".cpd", v.FileName()+".dat"); e != nil { - return e + return fmt.Errorf("rename %s: %v", v.FileName()+".cpd", e) } if e = os.Rename(v.FileName()+".cpx", v.FileName()+".idx"); e != nil { - return e + return fmt.Errorf("rename %s: %v", v.FileName()+".cpx", e) } } @@ -71,7 +135,6 @@ func (v *Volume) commitCompact() error { //time.Sleep(20 * time.Second) os.RemoveAll(v.FileName() + ".ldb") - os.RemoveAll(v.FileName() + ".bdb") glog.V(3).Infof("Loading volume %d commit file...", v.Id) if e = v.load(true, false, v.needleMapKind, 0); e != nil { @@ -94,14 +157,15 @@ func (v *Volume) cleanupCompact() error { return nil } -func fetchCompactRevisionFromDatFile(file *os.File) (compactRevision uint16, err error) { - superBlock, err := ReadSuperBlock(file) +func fetchCompactRevisionFromDatFile(datBackend backend.BackendStorageFile) (compactRevision uint16, err error) { + superBlock, err := super_block.ReadSuperBlock(datBackend) if err != nil { return 0, err } - return superBlock.CompactRevision, nil + return superBlock.CompactionRevision, nil } +// if old .dat and .idx files are updated, this func tries to apply the same changes to new files accordingly func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldIdxFileName string) (err error) { var indexSize int64 @@ -109,8 +173,10 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI defer oldIdxFile.Close() oldDatFile, err := os.Open(oldDatFileName) - defer oldDatFile.Close() + oldDatBackend := backend.NewDiskFile(oldDatFile) + defer oldDatBackend.Close() + // skip if the old .idx file has not changed if indexSize, err = verifyIndexFileIntegrity(oldIdxFile); err != nil { return fmt.Errorf("verifyIndexFileIntegrity %s failed: %v", oldIdxFileName, err) } @@ -118,7 +184,8 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI return nil } - oldDatCompactRevision, err := fetchCompactRevisionFromDatFile(oldDatFile) + // fail if the old .dat file has changed to a new revision + oldDatCompactRevision, err := fetchCompactRevisionFromDatFile(oldDatBackend) if err != nil { return fmt.Errorf("fetchCompactRevisionFromDatFile src %s failed: %v", oldDatFile.Name(), err) } @@ -132,12 +199,12 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI } incrementedHasUpdatedIndexEntry := make(map[NeedleId]keyField) - for idx_offset := indexSize - NeedleEntrySize; uint64(idx_offset) >= v.lastCompactIndexOffset; idx_offset -= NeedleEntrySize { + for idxOffset := indexSize - NeedleMapEntrySize; uint64(idxOffset) >= v.lastCompactIndexOffset; idxOffset -= NeedleMapEntrySize { var IdxEntry []byte - if IdxEntry, err = readIndexEntryAtOffset(oldIdxFile, idx_offset); err != nil { - return fmt.Errorf("readIndexEntry %s at offset %d failed: %v", oldIdxFileName, idx_offset, err) + if IdxEntry, err = readIndexEntryAtOffset(oldIdxFile, idxOffset); err != nil { + return fmt.Errorf("readIndexEntry %s at offset %d failed: %v", oldIdxFileName, idxOffset, err) } - key, offset, size := IdxFileEntry(IdxEntry) + key, offset, size := idx2.IdxFileEntry(IdxEntry) glog.V(4).Infof("key %d offset %d size %d", key, offset, size) if _, found := incrementedHasUpdatedIndexEntry[key]; !found { incrementedHasUpdatedIndexEntry[key] = keyField{ @@ -159,7 +226,8 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI if dst, err = os.OpenFile(newDatFileName, os.O_RDWR, 0644); err != nil { return fmt.Errorf("open dat file %s failed: %v", newDatFileName, err) } - defer dst.Close() + dstDatBackend := backend.NewDiskFile(dst) + defer dstDatBackend.Close() if idx, err = os.OpenFile(newIdxFileName, os.O_RDWR, 0644); err != nil { return fmt.Errorf("open idx file %s failed: %v", newIdxFileName, err) @@ -167,7 +235,7 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI defer idx.Close() var newDatCompactRevision uint16 - newDatCompactRevision, err = fetchCompactRevisionFromDatFile(dst) + newDatCompactRevision, err = fetchCompactRevisionFromDatFile(dstDatBackend) if err != nil { return fmt.Errorf("fetchCompactRevisionFromDatFile dst %s failed: %v", dst.Name(), err) } @@ -175,11 +243,9 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI return fmt.Errorf("oldDatFile %s 's compact revision is %d while newDatFile %s 's compact revision is %d", oldDatFileName, oldDatCompactRevision, newDatFileName, newDatCompactRevision) } - idx_entry_bytes := make([]byte, NeedleIdSize+OffsetSize+SizeSize) - for key, incre_idx_entry := range incrementedHasUpdatedIndexEntry { - NeedleIdToBytes(idx_entry_bytes[0:NeedleIdSize], key) - OffsetToBytes(idx_entry_bytes[NeedleIdSize:NeedleIdSize+OffsetSize], incre_idx_entry.offset) - util.Uint32toBytes(idx_entry_bytes[NeedleIdSize+OffsetSize:NeedleIdSize+OffsetSize+SizeSize], incre_idx_entry.size) + for key, increIdxEntry := range incrementedHasUpdatedIndexEntry { + + idxEntryBytes := needle_map.ToBytes(key, increIdxEntry.offset, increIdxEntry.size) var offset int64 if offset, err = dst.Seek(0, 2); err != nil { @@ -189,151 +255,181 @@ func (v *Volume) makeupDiff(newDatFileName, newIdxFileName, oldDatFileName, oldI //ensure file writing starting from aligned positions if offset%NeedlePaddingSize != 0 { offset = offset + (NeedlePaddingSize - offset%NeedlePaddingSize) - if offset, err = v.dataFile.Seek(offset, 0); err != nil { - glog.V(0).Infof("failed to align in datafile %s: %v", v.dataFile.Name(), err) + if offset, err = dst.Seek(offset, 0); err != nil { + glog.V(0).Infof("failed to align in datafile %s: %v", dst.Name(), err) return } } //updated needle - if incre_idx_entry.offset != 0 && incre_idx_entry.size != 0 && incre_idx_entry.size != TombstoneFileSize { + if !increIdxEntry.offset.IsZero() && increIdxEntry.size != 0 && increIdxEntry.size != TombstoneFileSize { //even the needle cache in memory is hit, the need_bytes is correct - glog.V(4).Infof("file %d offset %d size %d", key, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size) - var needle_bytes []byte - needle_bytes, err = ReadNeedleBlob(oldDatFile, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size, v.Version()) + glog.V(4).Infof("file %d offset %d size %d", key, increIdxEntry.offset.ToAcutalOffset(), increIdxEntry.size) + var needleBytes []byte + needleBytes, err = needle.ReadNeedleBlob(oldDatBackend, increIdxEntry.offset.ToAcutalOffset(), increIdxEntry.size, v.Version()) if err != nil { - return fmt.Errorf("ReadNeedleBlob %s key %d offset %d size %d failed: %v", oldDatFile.Name(), key, int64(incre_idx_entry.offset)*NeedlePaddingSize, incre_idx_entry.size, err) + return fmt.Errorf("ReadNeedleBlob %s key %d offset %d size %d failed: %v", oldDatFile.Name(), key, increIdxEntry.offset.ToAcutalOffset(), increIdxEntry.size, err) } - dst.Write(needle_bytes) - util.Uint32toBytes(idx_entry_bytes[8:12], uint32(offset/NeedlePaddingSize)) + dst.Write(needleBytes) + util.Uint32toBytes(idxEntryBytes[8:12], uint32(offset/NeedlePaddingSize)) } else { //deleted needle //fakeDelNeedle 's default Data field is nil - fakeDelNeedle := new(Needle) + fakeDelNeedle := new(needle.Needle) fakeDelNeedle.Id = key fakeDelNeedle.Cookie = 0x12345678 fakeDelNeedle.AppendAtNs = uint64(time.Now().UnixNano()) - _, _, err = fakeDelNeedle.Append(dst, v.Version()) + _, _, _, err = fakeDelNeedle.Append(dstDatBackend, v.Version()) if err != nil { return fmt.Errorf("append deleted %d failed: %v", key, err) } - util.Uint32toBytes(idx_entry_bytes[8:12], uint32(0)) + util.Uint32toBytes(idxEntryBytes[8:12], uint32(0)) } if _, err := idx.Seek(0, 2); err != nil { return fmt.Errorf("cannot seek end of indexfile %s: %v", newIdxFileName, err) } - _, err = idx.Write(idx_entry_bytes) + _, err = idx.Write(idxEntryBytes) } return nil } -func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, preallocate int64) (err error) { +type VolumeFileScanner4Vacuum struct { + version needle.Version + v *Volume + dstBackend backend.BackendStorageFile + nm *needle_map.MemDb + newOffset int64 + now uint64 + writeThrottler *util.WriteThrottler +} + +func (scanner *VolumeFileScanner4Vacuum) VisitSuperBlock(superBlock super_block.SuperBlock) error { + scanner.version = superBlock.Version + superBlock.CompactionRevision++ + _, err := scanner.dstBackend.WriteAt(superBlock.Bytes(), 0) + scanner.newOffset = int64(superBlock.BlockSize()) + return err + +} +func (scanner *VolumeFileScanner4Vacuum) ReadNeedleBody() bool { + return true +} + +func (scanner *VolumeFileScanner4Vacuum) VisitNeedle(n *needle.Needle, offset int64, needleHeader, needleBody []byte) error { + if n.HasTtl() && scanner.now >= n.LastModified+uint64(scanner.v.Ttl.Minutes()*60) { + return nil + } + nv, ok := scanner.v.nm.Get(n.Id) + glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv) + if ok && nv.Offset.ToAcutalOffset() == offset && nv.Size > 0 && nv.Size != TombstoneFileSize { + if err := scanner.nm.Set(n.Id, ToOffset(scanner.newOffset), n.Size); err != nil { + return fmt.Errorf("cannot put needle: %s", err) + } + if _, _, _, err := n.Append(scanner.dstBackend, scanner.v.Version()); err != nil { + return fmt.Errorf("cannot append needle: %s", err) + } + delta := n.DiskSize(scanner.version) + scanner.newOffset += delta + scanner.writeThrottler.MaybeSlowdown(delta) + glog.V(4).Infoln("saving key", n.Id, "volume offset", offset, "=>", scanner.newOffset, "data_size", n.Size) + } + return nil +} + +func (v *Volume) copyDataAndGenerateIndexFile(dstName, idxName string, preallocate int64, compactionBytePerSecond int64) (err error) { var ( - dst, idx *os.File + dst backend.BackendStorageFile ) - if dst, err = createVolumeFile(dstName, preallocate); err != nil { + if dst, err = createVolumeFile(dstName, preallocate, 0); err != nil { return } defer dst.Close() - if idx, err = os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil { - return - } - defer idx.Close() - - nm := NewBtreeNeedleMap(idx) - new_offset := int64(0) - - now := uint64(time.Now().Unix()) + nm := needle_map.NewMemDb() + defer nm.Close() - var version Version - err = ScanVolumeFile(v.dir, v.Collection, v.Id, v.needleMapKind, - func(superBlock SuperBlock) error { - version = superBlock.Version() - superBlock.CompactRevision++ - _, err = dst.Write(superBlock.Bytes()) - new_offset = int64(superBlock.BlockSize()) - return err - }, true, func(n *Needle, offset int64) error { - if n.HasTtl() && now >= n.LastModified+uint64(v.Ttl.Minutes()*60) { - return nil - } - nv, ok := v.nm.Get(n.Id) - glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv) - if ok && int64(nv.Offset)*NeedlePaddingSize == offset && nv.Size > 0 { - if err = nm.Put(n.Id, Offset(new_offset/NeedlePaddingSize), n.Size); err != nil { - return fmt.Errorf("cannot put needle: %s", err) - } - if _, _, err := n.Append(dst, v.Version()); err != nil { - return fmt.Errorf("cannot append needle: %s", err) - } - new_offset += n.DiskSize(version) - glog.V(4).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size) - } - return nil - }) + scanner := &VolumeFileScanner4Vacuum{ + v: v, + now: uint64(time.Now().Unix()), + nm: nm, + dstBackend: dst, + writeThrottler: util.NewWriteThrottler(compactionBytePerSecond), + } + err = ScanVolumeFile(v.dir, v.Collection, v.Id, v.needleMapKind, scanner) + if err != nil { + return nil + } + err = nm.SaveToIdx(idxName) return } -func (v *Volume) copyDataBasedOnIndexFile(dstName, idxName string) (err error) { +func copyDataBasedOnIndexFile(srcDatName, srcIdxName, dstDatName, datIdxName string, sb super_block.SuperBlock, version needle.Version, preallocate int64, compactionBytePerSecond int64) (err error) { var ( - dst, idx, oldIndexFile *os.File + srcDatBackend, dstDatBackend backend.BackendStorageFile + dataFile *os.File ) - if dst, err = os.OpenFile(dstName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil { + if dstDatBackend, err = createVolumeFile(dstDatName, preallocate, 0); err != nil { return } - defer dst.Close() + defer dstDatBackend.Close() - if idx, err = os.OpenFile(idxName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil { + oldNm := needle_map.NewMemDb() + defer oldNm.Close() + newNm := needle_map.NewMemDb() + defer newNm.Close() + if err = oldNm.LoadFromIdx(srcIdxName); err != nil { return } - defer idx.Close() - - if oldIndexFile, err = os.OpenFile(v.FileName()+".idx", os.O_RDONLY, 0644); err != nil { - return + if dataFile, err = os.Open(srcDatName); err != nil { + return err } - defer oldIndexFile.Close() + srcDatBackend = backend.NewDiskFile(dataFile) + defer srcDatBackend.Close() - nm := NewBtreeNeedleMap(idx) now := uint64(time.Now().Unix()) - v.SuperBlock.CompactRevision++ - dst.Write(v.SuperBlock.Bytes()) - new_offset := int64(v.SuperBlock.BlockSize()) + sb.CompactionRevision++ + dstDatBackend.WriteAt(sb.Bytes(), 0) + newOffset := int64(sb.BlockSize()) + + writeThrottler := util.NewWriteThrottler(compactionBytePerSecond) - WalkIndexFile(oldIndexFile, func(key NeedleId, offset Offset, size uint32) error { - if offset == 0 || size == TombstoneFileSize { + oldNm.AscendingVisit(func(value needle_map.NeedleValue) error { + + offset, size := value.Offset, value.Size + + if offset.IsZero() || size == TombstoneFileSize { return nil } - nv, ok := v.nm.Get(key) - if !ok { + n := new(needle.Needle) + err := n.ReadData(srcDatBackend, offset.ToAcutalOffset(), size, version) + if err != nil { return nil } - n := new(Needle) - n.ReadData(v.dataFile, int64(offset)*NeedlePaddingSize, size, v.Version()) - - if n.HasTtl() && now >= n.LastModified+uint64(v.Ttl.Minutes()*60) { + if n.HasTtl() && now >= n.LastModified+uint64(sb.Ttl.Minutes()*60) { return nil } - glog.V(4).Infoln("needle expected offset ", offset, "ok", ok, "nv", nv) - if nv.Offset == offset && nv.Size > 0 { - if err = nm.Put(n.Id, Offset(new_offset/NeedlePaddingSize), n.Size); err != nil { - return fmt.Errorf("cannot put needle: %s", err) - } - if _, _, err = n.Append(dst, v.Version()); err != nil { - return fmt.Errorf("cannot append needle: %s", err) - } - new_offset += n.DiskSize(v.Version()) - glog.V(3).Infoln("saving key", n.Id, "volume offset", offset, "=>", new_offset, "data_size", n.Size) + if err = newNm.Set(n.Id, ToOffset(newOffset), n.Size); err != nil { + return fmt.Errorf("cannot put needle: %s", err) + } + if _, _, _, err = n.Append(dstDatBackend, sb.Version); err != nil { + return fmt.Errorf("cannot append needle: %s", err) } + delta := n.DiskSize(version) + newOffset += delta + writeThrottler.MaybeSlowdown(delta) + glog.V(4).Infoln("saving key", n.Id, "volume offset", offset, "=>", newOffset, "data_size", n.Size) + return nil }) + newNm.SaveToIdx(datIdxName) + return } diff --git a/weed/storage/volume_vacuum_test.go b/weed/storage/volume_vacuum_test.go index 464d52618..51f04c8b1 100644 --- a/weed/storage/volume_vacuum_test.go +++ b/weed/storage/volume_vacuum_test.go @@ -1,11 +1,15 @@ package storage import ( - "github.com/chrislusf/seaweedfs/weed/storage/types" "io/ioutil" "math/rand" "os" "testing" + "time" + + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + "github.com/chrislusf/seaweedfs/weed/storage/types" ) /* @@ -43,7 +47,7 @@ func TestMakeDiff(t *testing.T) { v := new(Volume) //lastCompactIndexOffset value is the index file size before step 4 v.lastCompactIndexOffset = 96 - v.SuperBlock.version = 0x2 + v.SuperBlock.Version = 0x2 /* err := v.makeupDiff( "/yourpath/1.cpd", @@ -65,13 +69,13 @@ func TestCompaction(t *testing.T) { } defer os.RemoveAll(dir) // clean up - v, err := NewVolume(dir, "", 1, NeedleMapInMemory, &ReplicaPlacement{}, &TTL{}, 0) + v, err := NewVolume(dir, "", 1, NeedleMapInMemory, &super_block.ReplicaPlacement{}, &needle.TTL{}, 0, 0) if err != nil { t.Fatalf("volume creation: %v", err) } - beforeCommitFileCount := 1000 - afterCommitFileCount := 1000 + beforeCommitFileCount := 10000 + afterCommitFileCount := 10000 infos := make([]*needleInfo, beforeCommitFileCount+afterCommitFileCount) @@ -79,17 +83,20 @@ func TestCompaction(t *testing.T) { doSomeWritesDeletes(i, v, t, infos) } - v.Compact(0) + startTime := time.Now() + v.Compact2(0, 0) + speed := float64(v.ContentSize()) / time.Now().Sub(startTime).Seconds() + t.Logf("compaction speed: %.2f bytes/s", speed) for i := 1; i <= afterCommitFileCount; i++ { doSomeWritesDeletes(i+beforeCommitFileCount, v, t, infos) } - v.commitCompact() + v.CommitCompact() v.Close() - v, err = NewVolume(dir, "", 1, NeedleMapInMemory, nil, nil, 0) + v, err = NewVolume(dir, "", 1, NeedleMapInMemory, nil, nil, 0, 0) if err != nil { t.Fatalf("volume reloading: %v", err) } @@ -122,7 +129,7 @@ func TestCompaction(t *testing.T) { } func doSomeWritesDeletes(i int, v *Volume, t *testing.T, infos []*needleInfo) { n := newRandomNeedle(uint64(i)) - size, err := v.writeNeedle(n) + _, size, _, err := v.writeNeedle(n) if err != nil { t.Fatalf("write file %d: %v", i, err) } @@ -131,7 +138,7 @@ func doSomeWritesDeletes(i int, v *Volume, t *testing.T, infos []*needleInfo) { crc: n.Checksum, } // println("written file", i, "checksum", n.Checksum.Value(), "size", size) - if rand.Float64() < 0.5 { + if rand.Float64() < 0.03 { toBeDeleted := rand.Intn(i) + 1 oldNeedle := newEmptyNeedle(uint64(toBeDeleted)) v.deleteNeedle(oldNeedle) @@ -145,21 +152,21 @@ func doSomeWritesDeletes(i int, v *Volume, t *testing.T, infos []*needleInfo) { type needleInfo struct { size uint32 - crc CRC + crc needle.CRC } -func newRandomNeedle(id uint64) *Needle { - n := new(Needle) +func newRandomNeedle(id uint64) *needle.Needle { + n := new(needle.Needle) n.Data = make([]byte, rand.Intn(1024)) rand.Read(n.Data) - n.Checksum = NewCRC(n.Data) + n.Checksum = needle.NewCRC(n.Data) n.Id = types.Uint64ToNeedleId(id) return n } -func newEmptyNeedle(id uint64) *Needle { - n := new(Needle) +func newEmptyNeedle(id uint64) *needle.Needle { + n := new(needle.Needle) n.Id = types.Uint64ToNeedleId(id) return n } diff --git a/weed/tools/read_index.go b/weed/tools/read_index.go deleted file mode 100644 index d53f489ea..000000000 --- a/weed/tools/read_index.go +++ /dev/null @@ -1,29 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "log" - "os" - - "github.com/chrislusf/seaweedfs/weed/storage" - "github.com/chrislusf/seaweedfs/weed/storage/types" -) - -var ( - indexFileName = flag.String("file", "", ".idx file to analyze") -) - -func main() { - flag.Parse() - indexFile, err := os.OpenFile(*indexFileName, os.O_RDONLY, 0644) - if err != nil { - log.Fatalf("Create Volume Index [ERROR] %s\n", err) - } - defer indexFile.Close() - - storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error { - fmt.Printf("key %d, offset %d, size %d, nextOffset %d\n", key, offset*8, size, int64(offset)*types.NeedlePaddingSize+int64(size)) - return nil - }) -} diff --git a/weed/topology/allocate_volume.go b/weed/topology/allocate_volume.go index 61fc9d479..e5dc48652 100644 --- a/weed/topology/allocate_volume.go +++ b/weed/topology/allocate_volume.go @@ -5,22 +5,25 @@ import ( "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "google.golang.org/grpc" ) type AllocateVolumeResult struct { Error string } -func AllocateVolume(dn *DataNode, vid storage.VolumeId, option *VolumeGrowOption) error { +func AllocateVolume(dn *DataNode, grpcDialOption grpc.DialOption, vid needle.VolumeId, option *VolumeGrowOption) error { - return operation.WithVolumeServerClient(dn.Url(), func(client volume_server_pb.VolumeServerClient) error { - _, deleteErr := client.AssignVolume(context.Background(), &volume_server_pb.AssignVolumeRequest{ - VolumdId: uint32(vid), - Collection: option.Collection, - Replication: option.ReplicaPlacement.String(), - Ttl: option.Ttl.String(), - Preallocate: option.Prealloacte, + return operation.WithVolumeServerClient(dn.Url(), grpcDialOption, func(client volume_server_pb.VolumeServerClient) error { + + _, deleteErr := client.AllocateVolume(context.Background(), &volume_server_pb.AllocateVolumeRequest{ + VolumeId: uint32(vid), + Collection: option.Collection, + Replication: option.ReplicaPlacement.String(), + Ttl: option.Ttl.String(), + Preallocate: option.Prealloacte, + MemoryMapMaxSizeMb: option.MemoryMapMaxSizeMb, }) return deleteErr }) diff --git a/weed/topology/cluster_commands.go b/weed/topology/cluster_commands.go index bb44669b2..152691ccb 100644 --- a/weed/topology/cluster_commands.go +++ b/weed/topology/cluster_commands.go @@ -3,14 +3,14 @@ package topology import ( "github.com/chrislusf/raft" "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) type MaxVolumeIdCommand struct { - MaxVolumeId storage.VolumeId `json:"maxVolumeId"` + MaxVolumeId needle.VolumeId `json:"maxVolumeId"` } -func NewMaxVolumeIdCommand(value storage.VolumeId) *MaxVolumeIdCommand { +func NewMaxVolumeIdCommand(value needle.VolumeId) *MaxVolumeIdCommand { return &MaxVolumeIdCommand{ MaxVolumeId: value, } @@ -25,7 +25,7 @@ func (c *MaxVolumeIdCommand) Apply(server raft.Server) (interface{}, error) { before := topo.GetMaxVolumeId() topo.UpAdjustMaxVolumeId(c.MaxVolumeId) - glog.V(0).Infoln("max volume id", before, "==>", topo.GetMaxVolumeId()) + glog.V(1).Infoln("max volume id", before, "==>", topo.GetMaxVolumeId()) return nil, nil } diff --git a/weed/topology/collection.go b/weed/topology/collection.go index a17f0c961..7a611d904 100644 --- a/weed/topology/collection.go +++ b/weed/topology/collection.go @@ -3,7 +3,8 @@ package topology import ( "fmt" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/util" ) @@ -23,7 +24,7 @@ func (c *Collection) String() string { return fmt.Sprintf("Name:%s, volumeSizeLimit:%d, storageType2VolumeLayout:%v", c.Name, c.volumeSizeLimit, c.storageType2VolumeLayout) } -func (c *Collection) GetOrCreateVolumeLayout(rp *storage.ReplicaPlacement, ttl *storage.TTL) *VolumeLayout { +func (c *Collection) GetOrCreateVolumeLayout(rp *super_block.ReplicaPlacement, ttl *needle.TTL) *VolumeLayout { keyString := rp.String() if ttl != nil { keyString += ttl.String() @@ -34,7 +35,7 @@ func (c *Collection) GetOrCreateVolumeLayout(rp *storage.ReplicaPlacement, ttl * return vl.(*VolumeLayout) } -func (c *Collection) Lookup(vid storage.VolumeId) []*DataNode { +func (c *Collection) Lookup(vid needle.VolumeId) []*DataNode { for _, vl := range c.storageType2VolumeLayout.Items() { if vl != nil { if list := vl.(*VolumeLayout).Lookup(vid); list != nil { diff --git a/weed/topology/data_center.go b/weed/topology/data_center.go index bcf2dfd31..dc3accb71 100644 --- a/weed/topology/data_center.go +++ b/weed/topology/data_center.go @@ -1,5 +1,7 @@ package topology +import "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + type DataCenter struct { NodeImpl } @@ -38,3 +40,19 @@ func (dc *DataCenter) ToMap() interface{} { m["Racks"] = racks return m } + +func (dc *DataCenter) ToDataCenterInfo() *master_pb.DataCenterInfo { + m := &master_pb.DataCenterInfo{ + Id: string(dc.Id()), + VolumeCount: uint64(dc.GetVolumeCount()), + MaxVolumeCount: uint64(dc.GetMaxVolumeCount()), + FreeVolumeCount: uint64(dc.FreeSpace()), + ActiveVolumeCount: uint64(dc.GetActiveVolumeCount()), + RemoteVolumeCount: uint64(dc.GetRemoteVolumeCount()), + } + for _, c := range dc.Children() { + rack := c.(*Rack) + m.RackInfos = append(m.RackInfos, rack.ToRackInfo()) + } + return m +} diff --git a/weed/topology/data_node.go b/weed/topology/data_node.go index 6ee9a8a03..617341e54 100644 --- a/weed/topology/data_node.go +++ b/weed/topology/data_node.go @@ -3,6 +3,11 @@ package topology import ( "fmt" "strconv" + "sync" + + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage" @@ -10,18 +15,21 @@ import ( type DataNode struct { NodeImpl - volumes map[storage.VolumeId]storage.VolumeInfo - Ip string - Port int - PublicUrl string - LastSeen int64 // unix time in seconds + volumes map[needle.VolumeId]storage.VolumeInfo + Ip string + Port int + PublicUrl string + LastSeen int64 // unix time in seconds + ecShards map[needle.VolumeId]*erasure_coding.EcVolumeInfo + ecShardsLock sync.RWMutex } func NewDataNode(id string) *DataNode { s := &DataNode{} s.id = NodeId(id) s.nodeType = "DataNode" - s.volumes = make(map[storage.VolumeId]storage.VolumeInfo) + s.volumes = make(map[needle.VolumeId]storage.VolumeInfo) + s.ecShards = make(map[needle.VolumeId]*erasure_coding.EcVolumeInfo) s.NodeImpl.value = s return s } @@ -35,22 +43,33 @@ func (dn *DataNode) String() string { func (dn *DataNode) AddOrUpdateVolume(v storage.VolumeInfo) (isNew bool) { dn.Lock() defer dn.Unlock() - if _, ok := dn.volumes[v.Id]; !ok { + if oldV, ok := dn.volumes[v.Id]; !ok { dn.volumes[v.Id] = v dn.UpAdjustVolumeCountDelta(1) + if v.IsRemote() { + dn.UpAdjustRemoteVolumeCountDelta(1) + } if !v.ReadOnly { dn.UpAdjustActiveVolumeCountDelta(1) } dn.UpAdjustMaxVolumeId(v.Id) isNew = true } else { + if oldV.IsRemote() != v.IsRemote() { + if v.IsRemote() { + dn.UpAdjustRemoteVolumeCountDelta(1) + } + if oldV.IsRemote() { + dn.UpAdjustRemoteVolumeCountDelta(-1) + } + } dn.volumes[v.Id] = v } return } func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (newVolumes, deletedVolumes []storage.VolumeInfo) { - actualVolumeMap := make(map[storage.VolumeId]storage.VolumeInfo) + actualVolumeMap := make(map[needle.VolumeId]storage.VolumeInfo) for _, v := range actualVolumes { actualVolumeMap[v.Id] = v } @@ -61,7 +80,12 @@ func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (newVolume delete(dn.volumes, vid) deletedVolumes = append(deletedVolumes, v) dn.UpAdjustVolumeCountDelta(-1) - dn.UpAdjustActiveVolumeCountDelta(-1) + if v.IsRemote() { + dn.UpAdjustRemoteVolumeCountDelta(-1) + } + if !v.ReadOnly { + dn.UpAdjustActiveVolumeCountDelta(-1) + } } } dn.Unlock() @@ -74,6 +98,25 @@ func (dn *DataNode) UpdateVolumes(actualVolumes []storage.VolumeInfo) (newVolume return } +func (dn *DataNode) DeltaUpdateVolumes(newlVolumes, deletedVolumes []storage.VolumeInfo) { + dn.Lock() + for _, v := range deletedVolumes { + delete(dn.volumes, v.Id) + dn.UpAdjustVolumeCountDelta(-1) + if v.IsRemote() { + dn.UpAdjustRemoteVolumeCountDelta(-1) + } + if !v.ReadOnly { + dn.UpAdjustActiveVolumeCountDelta(-1) + } + } + dn.Unlock() + for _, v := range newlVolumes { + dn.AddOrUpdateVolume(v) + } + return +} + func (dn *DataNode) GetVolumes() (ret []storage.VolumeInfo) { dn.RLock() for _, v := range dn.volumes { @@ -83,12 +126,12 @@ func (dn *DataNode) GetVolumes() (ret []storage.VolumeInfo) { return ret } -func (dn *DataNode) GetVolumesById(id storage.VolumeId) (storage.VolumeInfo, error) { +func (dn *DataNode) GetVolumesById(id needle.VolumeId) (storage.VolumeInfo, error) { dn.RLock() defer dn.RUnlock() - v_info, ok := dn.volumes[id] + vInfo, ok := dn.volumes[id] if ok { - return v_info, nil + return vInfo, nil } else { return storage.VolumeInfo{}, fmt.Errorf("volumeInfo not found") } @@ -123,8 +166,27 @@ func (dn *DataNode) ToMap() interface{} { ret := make(map[string]interface{}) ret["Url"] = dn.Url() ret["Volumes"] = dn.GetVolumeCount() + ret["EcShards"] = dn.GetEcShardCount() ret["Max"] = dn.GetMaxVolumeCount() ret["Free"] = dn.FreeSpace() ret["PublicUrl"] = dn.PublicUrl return ret } + +func (dn *DataNode) ToDataNodeInfo() *master_pb.DataNodeInfo { + m := &master_pb.DataNodeInfo{ + Id: string(dn.Id()), + VolumeCount: uint64(dn.GetVolumeCount()), + MaxVolumeCount: uint64(dn.GetMaxVolumeCount()), + FreeVolumeCount: uint64(dn.FreeSpace()), + ActiveVolumeCount: uint64(dn.GetActiveVolumeCount()), + RemoteVolumeCount: uint64(dn.GetRemoteVolumeCount()), + } + for _, v := range dn.GetVolumes() { + m.VolumeInfos = append(m.VolumeInfos, v.ToVolumeInformationMessage()) + } + for _, ecv := range dn.GetEcShards() { + m.EcShardInfos = append(m.EcShardInfos, ecv.ToVolumeEcShardInformationMessage()) + } + return m +} diff --git a/weed/topology/data_node_ec.go b/weed/topology/data_node_ec.go new file mode 100644 index 000000000..75c8784fe --- /dev/null +++ b/weed/topology/data_node_ec.go @@ -0,0 +1,135 @@ +package topology + +import ( + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +func (dn *DataNode) GetEcShards() (ret []*erasure_coding.EcVolumeInfo) { + dn.RLock() + for _, ecVolumeInfo := range dn.ecShards { + ret = append(ret, ecVolumeInfo) + } + dn.RUnlock() + return ret +} + +func (dn *DataNode) UpdateEcShards(actualShards []*erasure_coding.EcVolumeInfo) (newShards, deletedShards []*erasure_coding.EcVolumeInfo) { + // prepare the new ec shard map + actualEcShardMap := make(map[needle.VolumeId]*erasure_coding.EcVolumeInfo) + for _, ecShards := range actualShards { + actualEcShardMap[ecShards.VolumeId] = ecShards + } + + // found out the newShards and deletedShards + var newShardCount, deletedShardCount int + dn.ecShardsLock.RLock() + for vid, ecShards := range dn.ecShards { + if actualEcShards, ok := actualEcShardMap[vid]; !ok { + // dn registered ec shards not found in the new set of ec shards + deletedShards = append(deletedShards, ecShards) + deletedShardCount += ecShards.ShardIdCount() + } else { + // found, but maybe the actual shard could be missing + a := actualEcShards.Minus(ecShards) + if a.ShardIdCount() > 0 { + newShards = append(newShards, a) + newShardCount += a.ShardIdCount() + } + d := ecShards.Minus(actualEcShards) + if d.ShardIdCount() > 0 { + deletedShards = append(deletedShards, d) + deletedShardCount += d.ShardIdCount() + } + } + } + for _, ecShards := range actualShards { + if _, found := dn.ecShards[ecShards.VolumeId]; !found { + newShards = append(newShards, ecShards) + newShardCount += ecShards.ShardIdCount() + } + } + dn.ecShardsLock.RUnlock() + + if len(newShards) > 0 || len(deletedShards) > 0 { + // if changed, set to the new ec shard map + dn.ecShardsLock.Lock() + dn.ecShards = actualEcShardMap + dn.UpAdjustEcShardCountDelta(int64(newShardCount - deletedShardCount)) + dn.ecShardsLock.Unlock() + } + + return +} + +func (dn *DataNode) DeltaUpdateEcShards(newShards, deletedShards []*erasure_coding.EcVolumeInfo) { + + for _, newShard := range newShards { + dn.AddOrUpdateEcShard(newShard) + } + + for _, deletedShard := range deletedShards { + dn.DeleteEcShard(deletedShard) + } + +} + +func (dn *DataNode) AddOrUpdateEcShard(s *erasure_coding.EcVolumeInfo) { + dn.ecShardsLock.Lock() + defer dn.ecShardsLock.Unlock() + + delta := 0 + if existing, ok := dn.ecShards[s.VolumeId]; !ok { + dn.ecShards[s.VolumeId] = s + delta = s.ShardBits.ShardIdCount() + } else { + oldCount := existing.ShardBits.ShardIdCount() + existing.ShardBits = existing.ShardBits.Plus(s.ShardBits) + delta = existing.ShardBits.ShardIdCount() - oldCount + } + + dn.UpAdjustEcShardCountDelta(int64(delta)) + +} + +func (dn *DataNode) DeleteEcShard(s *erasure_coding.EcVolumeInfo) { + dn.ecShardsLock.Lock() + defer dn.ecShardsLock.Unlock() + + if existing, ok := dn.ecShards[s.VolumeId]; ok { + oldCount := existing.ShardBits.ShardIdCount() + existing.ShardBits = existing.ShardBits.Minus(s.ShardBits) + delta := existing.ShardBits.ShardIdCount() - oldCount + dn.UpAdjustEcShardCountDelta(int64(delta)) + if existing.ShardBits.ShardIdCount() == 0 { + delete(dn.ecShards, s.VolumeId) + } + } + +} + +func (dn *DataNode) HasVolumesById(id needle.VolumeId) (hasVolumeId bool) { + + // check whether normal volumes has this volume id + dn.RLock() + _, ok := dn.volumes[id] + if ok { + hasVolumeId = true + } + dn.RUnlock() + + if hasVolumeId { + return + } + + // check whether ec shards has this volume id + dn.ecShardsLock.RLock() + _, ok = dn.ecShards[id] + if ok { + hasVolumeId = true + } + dn.ecShardsLock.RUnlock() + + return + +} diff --git a/weed/topology/node.go b/weed/topology/node.go index 48a61d867..ceeb96d60 100644 --- a/weed/topology/node.go +++ b/weed/topology/node.go @@ -5,26 +5,32 @@ import ( "math/rand" "strings" "sync" + "sync/atomic" "github.com/chrislusf/seaweedfs/weed/glog" - "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) type NodeId string type Node interface { Id() NodeId String() string - FreeSpace() int - ReserveOneVolume(r int) (*DataNode, error) - UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int) - UpAdjustVolumeCountDelta(volumeCountDelta int) - UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int) - UpAdjustMaxVolumeId(vid storage.VolumeId) + FreeSpace() int64 + ReserveOneVolume(r int64) (*DataNode, error) + UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int64) + UpAdjustVolumeCountDelta(volumeCountDelta int64) + UpAdjustRemoteVolumeCountDelta(remoteVolumeCountDelta int64) + UpAdjustEcShardCountDelta(ecShardCountDelta int64) + UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int64) + UpAdjustMaxVolumeId(vid needle.VolumeId) - GetVolumeCount() int - GetActiveVolumeCount() int - GetMaxVolumeCount() int - GetMaxVolumeId() storage.VolumeId + GetVolumeCount() int64 + GetEcShardCount() int64 + GetActiveVolumeCount() int64 + GetRemoteVolumeCount() int64 + GetMaxVolumeCount() int64 + GetMaxVolumeId() needle.VolumeId SetParent(Node) LinkChildNode(node Node) UnlinkChildNode(nodeId NodeId) @@ -39,14 +45,16 @@ type Node interface { GetValue() interface{} //get reference to the topology,dc,rack,datanode } type NodeImpl struct { + volumeCount int64 + remoteVolumeCount int64 + activeVolumeCount int64 + ecShardCount int64 + maxVolumeCount int64 id NodeId - volumeCount int - activeVolumeCount int - maxVolumeCount int parent Node sync.RWMutex // lock children children map[NodeId]Node - maxVolumeId storage.VolumeId + maxVolumeId needle.VolumeId //for rack, data center, topology nodeType string @@ -54,56 +62,64 @@ type NodeImpl struct { } // the first node must satisfy filterFirstNodeFn(), the rest nodes must have one free slot -func (n *NodeImpl) RandomlyPickNodes(numberOfNodes int, filterFirstNodeFn func(dn Node) error) (firstNode Node, restNodes []Node, err error) { - candidates := make([]Node, 0, len(n.children)) +func (n *NodeImpl) PickNodesByWeight(numberOfNodes int, filterFirstNodeFn func(dn Node) error) (firstNode Node, restNodes []Node, err error) { + var totalWeights int64 var errs []string n.RLock() + candidates := make([]Node, 0, len(n.children)) + candidatesWeights := make([]int64, 0, len(n.children)) + //pick nodes which has enough free volumes as candidates, and use free volumes number as node weight. for _, node := range n.children { - if err := filterFirstNodeFn(node); err == nil { - candidates = append(candidates, node) - } else { - errs = append(errs, string(node.Id())+":"+err.Error()) + if node.FreeSpace() <= 0 { + continue } + totalWeights += node.FreeSpace() + candidates = append(candidates, node) + candidatesWeights = append(candidatesWeights, node.FreeSpace()) } n.RUnlock() - if len(candidates) == 0 { - return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n")) + if len(candidates) < numberOfNodes { + glog.V(2).Infoln(n.Id(), "failed to pick", numberOfNodes, "from ", len(candidates), "node candidates") + return nil, nil, errors.New("No enough data node found!") } - firstNode = candidates[rand.Intn(len(candidates))] - glog.V(2).Infoln(n.Id(), "picked main node:", firstNode.Id()) - restNodes = make([]Node, numberOfNodes-1) - candidates = candidates[:0] - n.RLock() - for _, node := range n.children { - if node.Id() == firstNode.Id() { - continue - } - if node.FreeSpace() <= 0 { - continue + //pick nodes randomly by weights, the node picked earlier has higher final weights + sortedCandidates := make([]Node, 0, len(candidates)) + for i := 0; i < len(candidates); i++ { + weightsInterval := rand.Int63n(totalWeights) + lastWeights := int64(0) + for k, weights := range candidatesWeights { + if (weightsInterval >= lastWeights) && (weightsInterval < lastWeights+weights) { + sortedCandidates = append(sortedCandidates, candidates[k]) + candidatesWeights[k] = 0 + totalWeights -= weights + break + } + lastWeights += weights } - glog.V(2).Infoln("select rest node candidate:", node.Id()) - candidates = append(candidates, node) } - n.RUnlock() - glog.V(2).Infoln(n.Id(), "picking", numberOfNodes-1, "from rest", len(candidates), "node candidates") - ret := len(restNodes) == 0 - for k, node := range candidates { - if k < len(restNodes) { - restNodes[k] = node - if k == len(restNodes)-1 { - ret = true + + restNodes = make([]Node, 0, numberOfNodes-1) + ret := false + n.RLock() + for k, node := range sortedCandidates { + if err := filterFirstNodeFn(node); err == nil { + firstNode = node + if k >= numberOfNodes-1 { + restNodes = sortedCandidates[:numberOfNodes-1] + } else { + restNodes = append(restNodes, sortedCandidates[:k]...) + restNodes = append(restNodes, sortedCandidates[k+1:numberOfNodes]...) } + ret = true + break } else { - r := rand.Intn(k + 1) - if r < len(restNodes) { - restNodes[r] = node - } + errs = append(errs, string(node.Id())+":"+err.Error()) } } + n.RUnlock() if !ret { - glog.V(2).Infoln(n.Id(), "failed to pick", numberOfNodes-1, "from rest", len(candidates), "node candidates") - err = errors.New("Not enough data node found!") + return nil, nil, errors.New("No matching data node found! \n" + strings.Join(errs, "\n")) } return } @@ -126,8 +142,12 @@ func (n *NodeImpl) String() string { func (n *NodeImpl) Id() NodeId { return n.id } -func (n *NodeImpl) FreeSpace() int { - return n.maxVolumeCount - n.volumeCount +func (n *NodeImpl) FreeSpace() int64 { + freeVolumeSlotCount := n.maxVolumeCount + n.remoteVolumeCount - n.volumeCount + if n.ecShardCount > 0 { + freeVolumeSlotCount = freeVolumeSlotCount - n.ecShardCount/erasure_coding.DataShardsCount - 1 + } + return freeVolumeSlotCount } func (n *NodeImpl) SetParent(node Node) { n.parent = node @@ -146,7 +166,7 @@ func (n *NodeImpl) Parent() Node { func (n *NodeImpl) GetValue() interface{} { return n.value } -func (n *NodeImpl) ReserveOneVolume(r int) (assignedNode *DataNode, err error) { +func (n *NodeImpl) ReserveOneVolume(r int64) (assignedNode *DataNode, err error) { n.RLock() defer n.RUnlock() for _, node := range n.children { @@ -171,25 +191,37 @@ func (n *NodeImpl) ReserveOneVolume(r int) (assignedNode *DataNode, err error) { return nil, errors.New("No free volume slot found!") } -func (n *NodeImpl) UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int) { //can be negative - n.maxVolumeCount += maxVolumeCountDelta +func (n *NodeImpl) UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta int64) { //can be negative + atomic.AddInt64(&n.maxVolumeCount, maxVolumeCountDelta) if n.parent != nil { n.parent.UpAdjustMaxVolumeCountDelta(maxVolumeCountDelta) } } -func (n *NodeImpl) UpAdjustVolumeCountDelta(volumeCountDelta int) { //can be negative - n.volumeCount += volumeCountDelta +func (n *NodeImpl) UpAdjustVolumeCountDelta(volumeCountDelta int64) { //can be negative + atomic.AddInt64(&n.volumeCount, volumeCountDelta) if n.parent != nil { n.parent.UpAdjustVolumeCountDelta(volumeCountDelta) } } -func (n *NodeImpl) UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int) { //can be negative - n.activeVolumeCount += activeVolumeCountDelta +func (n *NodeImpl) UpAdjustRemoteVolumeCountDelta(remoteVolumeCountDelta int64) { //can be negative + atomic.AddInt64(&n.remoteVolumeCount, remoteVolumeCountDelta) + if n.parent != nil { + n.parent.UpAdjustRemoteVolumeCountDelta(remoteVolumeCountDelta) + } +} +func (n *NodeImpl) UpAdjustEcShardCountDelta(ecShardCountDelta int64) { //can be negative + atomic.AddInt64(&n.ecShardCount, ecShardCountDelta) + if n.parent != nil { + n.parent.UpAdjustEcShardCountDelta(ecShardCountDelta) + } +} +func (n *NodeImpl) UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta int64) { //can be negative + atomic.AddInt64(&n.activeVolumeCount, activeVolumeCountDelta) if n.parent != nil { n.parent.UpAdjustActiveVolumeCountDelta(activeVolumeCountDelta) } } -func (n *NodeImpl) UpAdjustMaxVolumeId(vid storage.VolumeId) { //can be negative +func (n *NodeImpl) UpAdjustMaxVolumeId(vid needle.VolumeId) { //can be negative if n.maxVolumeId < vid { n.maxVolumeId = vid if n.parent != nil { @@ -197,16 +229,22 @@ func (n *NodeImpl) UpAdjustMaxVolumeId(vid storage.VolumeId) { //can be negative } } } -func (n *NodeImpl) GetMaxVolumeId() storage.VolumeId { +func (n *NodeImpl) GetMaxVolumeId() needle.VolumeId { return n.maxVolumeId } -func (n *NodeImpl) GetVolumeCount() int { +func (n *NodeImpl) GetVolumeCount() int64 { return n.volumeCount } -func (n *NodeImpl) GetActiveVolumeCount() int { +func (n *NodeImpl) GetEcShardCount() int64 { + return n.ecShardCount +} +func (n *NodeImpl) GetRemoteVolumeCount() int64 { + return n.remoteVolumeCount +} +func (n *NodeImpl) GetActiveVolumeCount() int64 { return n.activeVolumeCount } -func (n *NodeImpl) GetMaxVolumeCount() int { +func (n *NodeImpl) GetMaxVolumeCount() int64 { return n.maxVolumeCount } @@ -218,6 +256,8 @@ func (n *NodeImpl) LinkChildNode(node Node) { n.UpAdjustMaxVolumeCountDelta(node.GetMaxVolumeCount()) n.UpAdjustMaxVolumeId(node.GetMaxVolumeId()) n.UpAdjustVolumeCountDelta(node.GetVolumeCount()) + n.UpAdjustRemoteVolumeCountDelta(node.GetRemoteVolumeCount()) + n.UpAdjustEcShardCountDelta(node.GetEcShardCount()) n.UpAdjustActiveVolumeCountDelta(node.GetActiveVolumeCount()) node.SetParent(n) glog.V(0).Infoln(n, "adds child", node.Id()) @@ -232,6 +272,8 @@ func (n *NodeImpl) UnlinkChildNode(nodeId NodeId) { node.SetParent(nil) delete(n.children, node.Id()) n.UpAdjustVolumeCountDelta(-node.GetVolumeCount()) + n.UpAdjustRemoteVolumeCountDelta(-node.GetRemoteVolumeCount()) + n.UpAdjustEcShardCountDelta(-node.GetEcShardCount()) n.UpAdjustActiveVolumeCountDelta(-node.GetActiveVolumeCount()) n.UpAdjustMaxVolumeCountDelta(-node.GetMaxVolumeCount()) glog.V(0).Infoln(n, "removes", node.Id()) diff --git a/weed/topology/rack.go b/weed/topology/rack.go index a48d64323..1921c0c05 100644 --- a/weed/topology/rack.go +++ b/weed/topology/rack.go @@ -1,6 +1,7 @@ package topology import ( + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" "strconv" "time" ) @@ -27,7 +28,7 @@ func (r *Rack) FindDataNode(ip string, port int) *DataNode { } return nil } -func (r *Rack) GetOrCreateDataNode(ip string, port int, publicUrl string, maxVolumeCount int) *DataNode { +func (r *Rack) GetOrCreateDataNode(ip string, port int, publicUrl string, maxVolumeCount int64) *DataNode { for _, c := range r.Children() { dn := c.(*DataNode) if dn.MatchLocation(ip, port) { @@ -58,3 +59,19 @@ func (r *Rack) ToMap() interface{} { m["DataNodes"] = dns return m } + +func (r *Rack) ToRackInfo() *master_pb.RackInfo { + m := &master_pb.RackInfo{ + Id: string(r.Id()), + VolumeCount: uint64(r.GetVolumeCount()), + MaxVolumeCount: uint64(r.GetMaxVolumeCount()), + FreeVolumeCount: uint64(r.FreeSpace()), + ActiveVolumeCount: uint64(r.GetActiveVolumeCount()), + RemoteVolumeCount: uint64(r.GetRemoteVolumeCount()), + } + for _, c := range r.Children() { + dn := c.(*DataNode) + m.DataNodeInfos = append(m.DataNodeInfos, dn.ToDataNodeInfo()) + } + return m +} diff --git a/weed/topology/store_replicate.go b/weed/topology/store_replicate.go index 42ad5bf10..8c4996d45 100644 --- a/weed/topology/store_replicate.go +++ b/weed/topology/store_replicate.go @@ -1,7 +1,6 @@ package topology import ( - "bytes" "encoding/json" "errors" "fmt" @@ -14,99 +13,106 @@ import ( "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/security" "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" "github.com/chrislusf/seaweedfs/weed/util" ) func ReplicatedWrite(masterNode string, s *storage.Store, - volumeId storage.VolumeId, needle *storage.Needle, - r *http.Request) (size uint32, errorStatus string) { + volumeId needle.VolumeId, n *needle.Needle, + r *http.Request) (size uint32, isUnchanged bool, err error) { //check JWT jwt := security.GetJwt(r) - ret, err := s.Write(volumeId, needle) - needToReplicate := !s.HasVolume(volumeId) - if err != nil { - errorStatus = "Failed to write to local disk (" + err.Error() + ")" - } else { - needToReplicate = needToReplicate || s.GetVolume(volumeId).NeedToReplicate() + var remoteLocations []operation.Location + if r.FormValue("type") != "replicate" { + remoteLocations, err = getWritableRemoteReplications(s, volumeId, masterNode) + if err != nil { + glog.V(0).Infoln(err) + return + } } - if !needToReplicate { - needToReplicate = s.GetVolume(volumeId).NeedToReplicate() + + size, isUnchanged, err = s.WriteVolumeNeedle(volumeId, n) + if err != nil { + err = fmt.Errorf("failed to write to local disk: %v", err) + glog.V(0).Infoln(err) + return } - if needToReplicate { //send to other replica locations - if r.FormValue("type") != "replicate" { - - if err = distributedOperation(masterNode, s, volumeId, func(location operation.Location) error { - u := url.URL{ - Scheme: "http", - Host: location.Url, - Path: r.URL.Path, - } - q := url.Values{ - "type": {"replicate"}, - } - if needle.LastModified > 0 { - q.Set("ts", strconv.FormatUint(needle.LastModified, 10)) - } - if needle.IsChunkedManifest() { - q.Set("cm", "true") + + if len(remoteLocations) > 0 { //send to other replica locations + if err = distributedOperation(remoteLocations, s, func(location operation.Location) error { + u := url.URL{ + Scheme: "http", + Host: location.Url, + Path: r.URL.Path, + } + q := url.Values{ + "type": {"replicate"}, + "ttl": {n.Ttl.String()}, + } + if n.LastModified > 0 { + q.Set("ts", strconv.FormatUint(n.LastModified, 10)) + } + if n.IsChunkedManifest() { + q.Set("cm", "true") + } + u.RawQuery = q.Encode() + + pairMap := make(map[string]string) + if n.HasPairs() { + tmpMap := make(map[string]string) + err := json.Unmarshal(n.Pairs, &tmpMap) + if err != nil { + glog.V(0).Infoln("Unmarshal pairs error:", err) } - u.RawQuery = q.Encode() - - pairMap := make(map[string]string) - if needle.HasPairs() { - tmpMap := make(map[string]string) - err := json.Unmarshal(needle.Pairs, &tmpMap) - if err != nil { - glog.V(0).Infoln("Unmarshal pairs error:", err) - } - for k, v := range tmpMap { - pairMap[storage.PairNamePrefix+k] = v - } + for k, v := range tmpMap { + pairMap[needle.PairNamePrefix+k] = v } - - _, err := operation.Upload(u.String(), - string(needle.Name), bytes.NewReader(needle.Data), needle.IsGzipped(), string(needle.Mime), - pairMap, jwt) - return err - }); err != nil { - ret = 0 - errorStatus = fmt.Sprintf("Failed to write to replicas for volume %d: %v", volumeId, err) } + + // volume server do not know about encryption + _, err := operation.UploadData(u.String(), string(n.Name), false, n.Data, n.IsGzipped(), string(n.Mime), pairMap, jwt) + return err + }); err != nil { + size = 0 + err = fmt.Errorf("failed to write to replicas for volume %d: %v", volumeId, err) + glog.V(0).Infoln(err) } } - size = ret return } func ReplicatedDelete(masterNode string, store *storage.Store, - volumeId storage.VolumeId, n *storage.Needle, - r *http.Request) (uint32, error) { + volumeId needle.VolumeId, n *needle.Needle, + r *http.Request) (size uint32, err error) { //check JWT jwt := security.GetJwt(r) - ret, err := store.Delete(volumeId, n) + var remoteLocations []operation.Location + if r.FormValue("type") != "replicate" { + remoteLocations, err = getWritableRemoteReplications(store, volumeId, masterNode) + if err != nil { + glog.V(0).Infoln(err) + return + } + } + + size, err = store.DeleteVolumeNeedle(volumeId, n) if err != nil { glog.V(0).Infoln("delete error:", err) - return ret, err + return } - needToReplicate := !store.HasVolume(volumeId) - if !needToReplicate && ret > 0 { - needToReplicate = store.GetVolume(volumeId).NeedToReplicate() - } - if needToReplicate { //send to other replica locations - if r.FormValue("type") != "replicate" { - if err = distributedOperation(masterNode, store, volumeId, func(location operation.Location) error { - return util.Delete("http://"+location.Url+r.URL.Path+"?type=replicate", jwt) - }); err != nil { - ret = 0 - } + if len(remoteLocations) > 0 { //send to other replica locations + if err = distributedOperation(remoteLocations, store, func(location operation.Location) error { + return util.Delete("http://"+location.Url+r.URL.Path+"?type=replicate", string(jwt)) + }); err != nil { + size = 0 } } - return ret, err + return } type DistributedOperationResult map[string]error @@ -129,32 +135,44 @@ type RemoteResult struct { Error error } -func distributedOperation(masterNode string, store *storage.Store, volumeId storage.VolumeId, op func(location operation.Location) error) error { - if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil { - length := 0 - selfUrl := (store.Ip + ":" + strconv.Itoa(store.Port)) - results := make(chan RemoteResult) - for _, location := range lookupResult.Locations { - if location.Url != selfUrl { - length++ - go func(location operation.Location, results chan RemoteResult) { - results <- RemoteResult{location.Url, op(location)} - }(location, results) +func distributedOperation(locations []operation.Location, store *storage.Store, op func(location operation.Location) error) error { + length := len(locations) + results := make(chan RemoteResult) + for _, location := range locations { + go func(location operation.Location, results chan RemoteResult) { + results <- RemoteResult{location.Url, op(location)} + }(location, results) + } + ret := DistributedOperationResult(make(map[string]error)) + for i := 0; i < length; i++ { + result := <-results + ret[result.Host] = result.Error + } + + return ret.Error() +} + +func getWritableRemoteReplications(s *storage.Store, volumeId needle.VolumeId, masterNode string) ( + remoteLocations []operation.Location, err error) { + copyCount := s.GetVolume(volumeId).ReplicaPlacement.GetCopyCount() + if copyCount > 1 { + if lookupResult, lookupErr := operation.Lookup(masterNode, volumeId.String()); lookupErr == nil { + if len(lookupResult.Locations) < copyCount { + err = fmt.Errorf("replicating opetations [%d] is less than volume %d replication copy count [%d]", + len(lookupResult.Locations), volumeId, copyCount) + return } - } - ret := DistributedOperationResult(make(map[string]error)) - for i := 0; i < length; i++ { - result := <-results - ret[result.Host] = result.Error - } - if volume := store.GetVolume(volumeId); volume != nil { - if length+1 < volume.ReplicaPlacement.GetCopyCount() { - return fmt.Errorf("replicating opetations [%d] is less than volume's replication copy count [%d]", length+1, volume.ReplicaPlacement.GetCopyCount()) + selfUrl := s.Ip + ":" + strconv.Itoa(s.Port) + for _, location := range lookupResult.Locations { + if location.Url != selfUrl { + remoteLocations = append(remoteLocations, location) + } } + } else { + err = fmt.Errorf("failed to lookup for %d: %v", volumeId, lookupErr) + return } - return ret.Error() - } else { - glog.V(0).Infoln() - return fmt.Errorf("Failed to lookup for %d: %v", volumeId, lookupErr) } + + return } diff --git a/weed/topology/topology.go b/weed/topology/topology.go index 4242bfa05..fbf998707 100644 --- a/weed/topology/topology.go +++ b/weed/topology/topology.go @@ -2,20 +2,28 @@ package topology import ( "errors" + "fmt" "math/rand" + "sync" "github.com/chrislusf/raft" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/pb/master_pb" "github.com/chrislusf/seaweedfs/weed/sequence" "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" "github.com/chrislusf/seaweedfs/weed/util" ) type Topology struct { + vacuumLockCounter int64 NodeImpl - collectionMap *util.ConcurrentReadMap + collectionMap *util.ConcurrentReadMap + ecShardMap map[needle.VolumeId]*EcShardLocations + ecShardMapLock sync.RWMutex pulse int64 @@ -37,6 +45,7 @@ func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, puls t.NodeImpl.value = t t.children = make(map[NodeId]Node) t.collectionMap = util.NewConcurrentReadMap() + t.ecShardMap = make(map[needle.VolumeId]*EcShardLocations) t.pulse = int64(pulse) t.volumeSizeLimit = volumeSizeLimit @@ -50,8 +59,13 @@ func NewTopology(id string, seq sequence.Sequencer, volumeSizeLimit uint64, puls } func (t *Topology) IsLeader() bool { - if leader, e := t.Leader(); e == nil { - return leader == t.RaftServer.Name() + if t.RaftServer != nil { + if t.RaftServer.State() == raft.Leader { + return true + } + if t.RaftServer.Leader() == "" { + return true + } } return false } @@ -66,13 +80,13 @@ func (t *Topology) Leader() (string, error) { if l == "" { // We are a single node cluster, we are the leader - return t.RaftServer.Name(), errors.New("Raft Server not initialized!") + return t.RaftServer.Name(), nil } return l, nil } -func (t *Topology) Lookup(collection string, vid storage.VolumeId) []*DataNode { +func (t *Topology) Lookup(collection string, vid needle.VolumeId) (dataNodes []*DataNode) { //maybe an issue if lots of collections? if collection == "" { for _, c := range t.collectionMap.Items() { @@ -85,14 +99,24 @@ func (t *Topology) Lookup(collection string, vid storage.VolumeId) []*DataNode { return c.(*Collection).Lookup(vid) } } + + if locations, found := t.LookupEcShards(vid); found { + for _, loc := range locations.Locations { + dataNodes = append(dataNodes, loc...) + } + return dataNodes + } + return nil } -func (t *Topology) NextVolumeId() storage.VolumeId { +func (t *Topology) NextVolumeId() (needle.VolumeId, error) { vid := t.GetMaxVolumeId() next := vid.Next() - go t.RaftServer.Do(NewMaxVolumeIdCommand(next)) - return next + if _, err := t.RaftServer.Do(NewMaxVolumeIdCommand(next)); err != nil { + return 0, err + } + return next, nil } func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool { @@ -102,19 +126,43 @@ func (t *Topology) HasWritableVolume(option *VolumeGrowOption) bool { func (t *Topology) PickForWrite(count uint64, option *VolumeGrowOption) (string, uint64, *DataNode, error) { vid, count, datanodes, err := t.GetVolumeLayout(option.Collection, option.ReplicaPlacement, option.Ttl).PickForWrite(count, option) - if err != nil || datanodes.Length() == 0 { - return "", 0, nil, errors.New("No writable volumes available!") + if err != nil { + return "", 0, nil, fmt.Errorf("failed to find writable volumes for collection:%s replication:%s ttl:%s error: %v", option.Collection, option.ReplicaPlacement.String(), option.Ttl.String(), err) + } + if datanodes.Length() == 0 { + return "", 0, nil, fmt.Errorf("no writable volumes available for collection:%s replication:%s ttl:%s", option.Collection, option.ReplicaPlacement.String(), option.Ttl.String()) } - fileId, count := t.Sequence.NextFileId(count) - return storage.NewFileId(*vid, fileId, rand.Uint32()).String(), count, datanodes.Head(), nil + fileId := t.Sequence.NextFileId(count) + return needle.NewFileId(*vid, fileId, rand.Uint32()).String(), count, datanodes.Head(), nil } -func (t *Topology) GetVolumeLayout(collectionName string, rp *storage.ReplicaPlacement, ttl *storage.TTL) *VolumeLayout { +func (t *Topology) GetVolumeLayout(collectionName string, rp *super_block.ReplicaPlacement, ttl *needle.TTL) *VolumeLayout { return t.collectionMap.Get(collectionName, func() interface{} { return NewCollection(collectionName, t.volumeSizeLimit) }).(*Collection).GetOrCreateVolumeLayout(rp, ttl) } +func (t *Topology) ListCollections(includeNormalVolumes, includeEcVolumes bool) (ret []string) { + + mapOfCollections := make(map[string]bool) + for _, c := range t.collectionMap.Items() { + mapOfCollections[c.(*Collection).Name] = true + } + + if includeEcVolumes { + t.ecShardMapLock.RLock() + for _, ecVolumeLocation := range t.ecShardMap { + mapOfCollections[ecVolumeLocation.Collection] = true + } + t.ecShardMapLock.RUnlock() + } + + for k := range mapOfCollections { + ret = append(ret, k) + } + return ret +} + func (t *Topology) FindCollection(collectionName string) (*Collection, bool) { c, hasCollection := t.collectionMap.Find(collectionName) if !hasCollection { @@ -152,6 +200,7 @@ func (t *Topology) GetOrCreateDataCenter(dcName string) *DataCenter { } func (t *Topology) SyncDataNodeRegistration(volumes []*master_pb.VolumeInformationMessage, dn *DataNode) (newVolumes, deletedVolumes []storage.VolumeInfo) { + // convert into in memory struct storage.VolumeInfo var volumeInfos []storage.VolumeInfo for _, v := range volumes { if vi, err := storage.NewVolumeInfo(v); err == nil { @@ -160,8 +209,9 @@ func (t *Topology) SyncDataNodeRegistration(volumes []*master_pb.VolumeInformati glog.V(0).Infof("Fail to convert joined volume information: %v", err) } } + // find out the delta volumes newVolumes, deletedVolumes = dn.UpdateVolumes(volumeInfos) - for _, v := range volumeInfos { + for _, v := range newVolumes { t.RegisterVolumeLayout(v, dn) } for _, v := range deletedVolumes { @@ -169,3 +219,33 @@ func (t *Topology) SyncDataNodeRegistration(volumes []*master_pb.VolumeInformati } return } + +func (t *Topology) IncrementalSyncDataNodeRegistration(newVolumes, deletedVolumes []*master_pb.VolumeShortInformationMessage, dn *DataNode) { + var newVis, oldVis []storage.VolumeInfo + for _, v := range newVolumes { + vi, err := storage.NewVolumeInfoFromShort(v) + if err != nil { + glog.V(0).Infof("NewVolumeInfoFromShort %v: %v", v, err) + continue + } + newVis = append(newVis, vi) + } + for _, v := range deletedVolumes { + vi, err := storage.NewVolumeInfoFromShort(v) + if err != nil { + glog.V(0).Infof("NewVolumeInfoFromShort %v: %v", v, err) + continue + } + oldVis = append(oldVis, vi) + } + dn.DeltaUpdateVolumes(newVis, oldVis) + + for _, vi := range newVis { + t.RegisterVolumeLayout(vi, dn) + } + for _, vi := range oldVis { + t.UnRegisterVolumeLayout(vi, dn) + } + + return +} diff --git a/weed/topology/topology_ec.go b/weed/topology/topology_ec.go new file mode 100644 index 000000000..93b39bb5d --- /dev/null +++ b/weed/topology/topology_ec.go @@ -0,0 +1,173 @@ +package topology + +import ( + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb/master_pb" + "github.com/chrislusf/seaweedfs/weed/storage/erasure_coding" + "github.com/chrislusf/seaweedfs/weed/storage/needle" +) + +type EcShardLocations struct { + Collection string + Locations [erasure_coding.TotalShardsCount][]*DataNode +} + +func (t *Topology) SyncDataNodeEcShards(shardInfos []*master_pb.VolumeEcShardInformationMessage, dn *DataNode) (newShards, deletedShards []*erasure_coding.EcVolumeInfo) { + // convert into in memory struct storage.VolumeInfo + var shards []*erasure_coding.EcVolumeInfo + for _, shardInfo := range shardInfos { + shards = append(shards, + erasure_coding.NewEcVolumeInfo( + shardInfo.Collection, + needle.VolumeId(shardInfo.Id), + erasure_coding.ShardBits(shardInfo.EcIndexBits))) + } + // find out the delta volumes + newShards, deletedShards = dn.UpdateEcShards(shards) + for _, v := range newShards { + t.RegisterEcShards(v, dn) + } + for _, v := range deletedShards { + t.UnRegisterEcShards(v, dn) + } + return +} + +func (t *Topology) IncrementalSyncDataNodeEcShards(newEcShards, deletedEcShards []*master_pb.VolumeEcShardInformationMessage, dn *DataNode) { + // convert into in memory struct storage.VolumeInfo + var newShards, deletedShards []*erasure_coding.EcVolumeInfo + for _, shardInfo := range newEcShards { + newShards = append(newShards, + erasure_coding.NewEcVolumeInfo( + shardInfo.Collection, + needle.VolumeId(shardInfo.Id), + erasure_coding.ShardBits(shardInfo.EcIndexBits))) + } + for _, shardInfo := range deletedEcShards { + deletedShards = append(deletedShards, + erasure_coding.NewEcVolumeInfo( + shardInfo.Collection, + needle.VolumeId(shardInfo.Id), + erasure_coding.ShardBits(shardInfo.EcIndexBits))) + } + + dn.DeltaUpdateEcShards(newShards, deletedShards) + + for _, v := range newShards { + t.RegisterEcShards(v, dn) + } + for _, v := range deletedShards { + t.UnRegisterEcShards(v, dn) + } + return +} + +func NewEcShardLocations(collection string) *EcShardLocations { + return &EcShardLocations{ + Collection: collection, + } +} + +func (loc *EcShardLocations) AddShard(shardId erasure_coding.ShardId, dn *DataNode) (added bool) { + dataNodes := loc.Locations[shardId] + for _, n := range dataNodes { + if n.Id() == dn.Id() { + return false + } + } + loc.Locations[shardId] = append(dataNodes, dn) + return true +} + +func (loc *EcShardLocations) DeleteShard(shardId erasure_coding.ShardId, dn *DataNode) (deleted bool) { + dataNodes := loc.Locations[shardId] + foundIndex := -1 + for index, n := range dataNodes { + if n.Id() == dn.Id() { + foundIndex = index + } + } + if foundIndex < 0 { + return false + } + loc.Locations[shardId] = append(dataNodes[:foundIndex], dataNodes[foundIndex+1:]...) + return true +} + +func (t *Topology) RegisterEcShards(ecShardInfos *erasure_coding.EcVolumeInfo, dn *DataNode) { + + t.ecShardMapLock.Lock() + defer t.ecShardMapLock.Unlock() + + locations, found := t.ecShardMap[ecShardInfos.VolumeId] + if !found { + locations = NewEcShardLocations(ecShardInfos.Collection) + t.ecShardMap[ecShardInfos.VolumeId] = locations + } + for _, shardId := range ecShardInfos.ShardIds() { + locations.AddShard(shardId, dn) + } +} + +func (t *Topology) UnRegisterEcShards(ecShardInfos *erasure_coding.EcVolumeInfo, dn *DataNode) { + glog.Infof("removing ec shard info:%+v", ecShardInfos) + t.ecShardMapLock.Lock() + defer t.ecShardMapLock.Unlock() + + locations, found := t.ecShardMap[ecShardInfos.VolumeId] + if !found { + return + } + for _, shardId := range ecShardInfos.ShardIds() { + locations.DeleteShard(shardId, dn) + } +} + +func (t *Topology) LookupEcShards(vid needle.VolumeId) (locations *EcShardLocations, found bool) { + t.ecShardMapLock.RLock() + defer t.ecShardMapLock.RUnlock() + + locations, found = t.ecShardMap[vid] + + return +} + +func (t *Topology) ListEcServersByCollection(collection string) (dataNodes []string) { + t.ecShardMapLock.RLock() + defer t.ecShardMapLock.RUnlock() + + dateNodeMap := make(map[string]bool) + for _, ecVolumeLocation := range t.ecShardMap { + if ecVolumeLocation.Collection == collection { + for _, locations := range ecVolumeLocation.Locations { + for _, loc := range locations { + dateNodeMap[string(loc.Id())] = true + } + } + } + } + + for k, _ := range dateNodeMap { + dataNodes = append(dataNodes, k) + } + + return +} + +func (t *Topology) DeleteEcCollection(collection string) { + t.ecShardMapLock.Lock() + defer t.ecShardMapLock.Unlock() + + var vids []needle.VolumeId + for vid, ecVolumeLocation := range t.ecShardMap { + if ecVolumeLocation.Collection == collection { + vids = append(vids, vid) + } + } + + for _, vid := range vids { + delete(t.ecShardMap, vid) + } + + return +} diff --git a/weed/topology/topology_event_handling.go b/weed/topology/topology_event_handling.go index a301103eb..068bd401e 100644 --- a/weed/topology/topology_event_handling.go +++ b/weed/topology/topology_event_handling.go @@ -1,6 +1,7 @@ package topology import ( + "google.golang.org/grpc" "math/rand" "time" @@ -8,7 +9,7 @@ import ( "github.com/chrislusf/seaweedfs/weed/storage" ) -func (t *Topology) StartRefreshWritableVolumes(garbageThreshold float64, preallocate int64) { +func (t *Topology) StartRefreshWritableVolumes(grpcDialOption grpc.DialOption, garbageThreshold float64, preallocate int64) { go func() { for { if t.IsLeader() { @@ -22,7 +23,7 @@ func (t *Topology) StartRefreshWritableVolumes(garbageThreshold float64, preallo c := time.Tick(15 * time.Minute) for _ = range c { if t.IsLeader() { - t.Vacuum(garbageThreshold, preallocate) + t.Vacuum(grpcDialOption, garbageThreshold, preallocate) } } }(garbageThreshold) @@ -58,6 +59,7 @@ func (t *Topology) UnRegisterDataNode(dn *DataNode) { vl.SetVolumeUnavailable(dn, v.Id) } dn.UpAdjustVolumeCountDelta(-dn.GetVolumeCount()) + dn.UpAdjustRemoteVolumeCountDelta(-dn.GetRemoteVolumeCount()) dn.UpAdjustActiveVolumeCountDelta(-dn.GetActiveVolumeCount()) dn.UpAdjustMaxVolumeCountDelta(-dn.GetMaxVolumeCount()) if dn.Parent() != nil { diff --git a/weed/topology/topology_map.go b/weed/topology/topology_map.go index 769ba0e2a..73c55d77d 100644 --- a/weed/topology/topology_map.go +++ b/weed/topology/topology_map.go @@ -23,7 +23,7 @@ func (t *Topology) ToMap() interface{} { } } } - m["layouts"] = layouts + m["Layouts"] = layouts return m } @@ -68,9 +68,28 @@ func (t *Topology) ToVolumeLocations() (volumeLocations []*master_pb.VolumeLocat for _, v := range dn.GetVolumes() { volumeLocation.NewVids = append(volumeLocation.NewVids, uint32(v.Id)) } + for _, s := range dn.GetEcShards() { + volumeLocation.NewVids = append(volumeLocation.NewVids, uint32(s.VolumeId)) + } volumeLocations = append(volumeLocations, volumeLocation) } } } return } + +func (t *Topology) ToTopologyInfo() *master_pb.TopologyInfo { + m := &master_pb.TopologyInfo{ + Id: string(t.Id()), + VolumeCount: uint64(t.GetVolumeCount()), + MaxVolumeCount: uint64(t.GetMaxVolumeCount()), + FreeVolumeCount: uint64(t.FreeSpace()), + ActiveVolumeCount: uint64(t.GetActiveVolumeCount()), + RemoteVolumeCount: uint64(t.GetRemoteVolumeCount()), + } + for _, c := range t.Children() { + dc := c.(*DataCenter) + m.DataCenterInfos = append(m.DataCenterInfos, dc.ToDataCenterInfo()) + } + return m +} diff --git a/weed/topology/topology_test.go b/weed/topology/topology_test.go index 07dc9c67b..e7676ccf7 100644 --- a/weed/topology/topology_test.go +++ b/weed/topology/topology_test.go @@ -4,6 +4,9 @@ import ( "github.com/chrislusf/seaweedfs/weed/pb/master_pb" "github.com/chrislusf/seaweedfs/weed/sequence" "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + "testing" ) @@ -39,7 +42,7 @@ func TestHandlingVolumeServerHeartbeat(t *testing.T) { DeletedByteCount: 34524, ReadOnly: false, ReplicaPlacement: uint32(0), - Version: uint32(1), + Version: uint32(needle.CurrentVersion), Ttl: 0, } volumeMessages = append(volumeMessages, volumeMessage) @@ -47,8 +50,8 @@ func TestHandlingVolumeServerHeartbeat(t *testing.T) { topo.SyncDataNodeRegistration(volumeMessages, dn) - assert(t, "activeVolumeCount1", topo.activeVolumeCount, volumeCount) - assert(t, "volumeCount", topo.volumeCount, volumeCount) + assert(t, "activeVolumeCount1", int(topo.activeVolumeCount), volumeCount) + assert(t, "volumeCount", int(topo.volumeCount), volumeCount) } { @@ -64,20 +67,68 @@ func TestHandlingVolumeServerHeartbeat(t *testing.T) { DeletedByteCount: 345240, ReadOnly: false, ReplicaPlacement: uint32(0), - Version: uint32(1), + Version: uint32(needle.CurrentVersion), Ttl: 0, } volumeMessages = append(volumeMessages, volumeMessage) } topo.SyncDataNodeRegistration(volumeMessages, dn) - assert(t, "activeVolumeCount1", topo.activeVolumeCount, volumeCount) - assert(t, "volumeCount", topo.volumeCount, volumeCount) + //rp, _ := storage.NewReplicaPlacementFromString("000") + //layout := topo.GetVolumeLayout("", rp, needle.EMPTY_TTL) + //assert(t, "writables", len(layout.writables), volumeCount) + + assert(t, "activeVolumeCount1", int(topo.activeVolumeCount), volumeCount) + assert(t, "volumeCount", int(topo.volumeCount), volumeCount) + } + + { + volumeCount := 6 + newVolumeShortMessage := &master_pb.VolumeShortInformationMessage{ + Id: uint32(3), + Collection: "", + ReplicaPlacement: uint32(0), + Version: uint32(needle.CurrentVersion), + Ttl: 0, + } + topo.IncrementalSyncDataNodeRegistration( + []*master_pb.VolumeShortInformationMessage{newVolumeShortMessage}, + nil, + dn) + rp, _ := super_block.NewReplicaPlacementFromString("000") + layout := topo.GetVolumeLayout("", rp, needle.EMPTY_TTL) + assert(t, "writables after repeated add", len(layout.writables), volumeCount) + + assert(t, "activeVolumeCount1", int(topo.activeVolumeCount), volumeCount) + assert(t, "volumeCount", int(topo.volumeCount), volumeCount) + + topo.IncrementalSyncDataNodeRegistration( + nil, + []*master_pb.VolumeShortInformationMessage{newVolumeShortMessage}, + dn) + assert(t, "writables after deletion", len(layout.writables), volumeCount-1) + assert(t, "activeVolumeCount1", int(topo.activeVolumeCount), volumeCount-1) + assert(t, "volumeCount", int(topo.volumeCount), volumeCount-1) + + topo.IncrementalSyncDataNodeRegistration( + []*master_pb.VolumeShortInformationMessage{newVolumeShortMessage}, + nil, + dn) + + for vid, _ := range layout.vid2location { + println("after add volume id", vid) + } + for _, vid := range layout.writables { + println("after add writable volume id", vid) + } + + assert(t, "writables after add back", len(layout.writables), volumeCount) + } topo.UnRegisterDataNode(dn) - assert(t, "activeVolumeCount2", topo.activeVolumeCount, 0) + assert(t, "activeVolumeCount2", int(topo.activeVolumeCount), 0) } @@ -96,20 +147,21 @@ func TestAddRemoveVolume(t *testing.T) { dn := rack.GetOrCreateDataNode("127.0.0.1", 34534, "127.0.0.1", 25) v := storage.VolumeInfo{ - Id: storage.VolumeId(1), + Id: needle.VolumeId(1), Size: 100, Collection: "xcollection", FileCount: 123, DeleteCount: 23, DeletedByteCount: 45, ReadOnly: false, - Version: storage.CurrentVersion, - ReplicaPlacement: &storage.ReplicaPlacement{}, - Ttl: storage.EMPTY_TTL, + Version: needle.CurrentVersion, + ReplicaPlacement: &super_block.ReplicaPlacement{}, + Ttl: needle.EMPTY_TTL, } dn.UpdateVolumes([]storage.VolumeInfo{v}) topo.RegisterVolumeLayout(v, dn) + topo.RegisterVolumeLayout(v, dn) if _, hasCollection := topo.FindCollection(v.Collection); !hasCollection { t.Errorf("collection %v should exist", v.Collection) diff --git a/weed/topology/topology_vacuum.go b/weed/topology/topology_vacuum.go index c6ec4375a..ca626e973 100644 --- a/weed/topology/topology_vacuum.go +++ b/weed/topology/topology_vacuum.go @@ -2,28 +2,37 @@ package topology import ( "context" + "sync/atomic" "time" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/operation" "github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb" - "github.com/chrislusf/seaweedfs/weed/storage" ) -func batchVacuumVolumeCheck(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList, garbageThreshold float64) bool { - ch := make(chan bool, locationlist.Length()) +func batchVacuumVolumeCheck(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, + locationlist *VolumeLocationList, garbageThreshold float64) (*VolumeLocationList, bool) { + ch := make(chan int, locationlist.Length()) + errCount := int32(0) for index, dn := range locationlist.list { - go func(index int, url string, vid storage.VolumeId) { - err := operation.WithVolumeServerClient(url, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + go func(index int, url string, vid needle.VolumeId) { + err := operation.WithVolumeServerClient(url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { resp, err := volumeServerClient.VacuumVolumeCheck(context.Background(), &volume_server_pb.VacuumVolumeCheckRequest{ - VolumdId: uint32(vid), + VolumeId: uint32(vid), }) if err != nil { - ch <- false + atomic.AddInt32(&errCount, 1) + ch <- -1 return err } - isNeeded := resp.GarbageRatio > garbageThreshold - ch <- isNeeded + if resp.GarbageRatio >= garbageThreshold { + ch <- index + } else { + ch <- -1 + } return nil }) if err != nil { @@ -31,27 +40,32 @@ func batchVacuumVolumeCheck(vl *VolumeLayout, vid storage.VolumeId, locationlist } }(index, dn.Url(), vid) } - isCheckSuccess := true - for _ = range locationlist.list { + vacuumLocationList := NewVolumeLocationList() + for range locationlist.list { select { - case canVacuum := <-ch: - isCheckSuccess = isCheckSuccess && canVacuum + case index := <-ch: + if index != -1 { + vacuumLocationList.list = append(vacuumLocationList.list, locationlist.list[index]) + } case <-time.After(30 * time.Minute): - isCheckSuccess = false - break + return vacuumLocationList, false } } - return isCheckSuccess + return vacuumLocationList, errCount == 0 && len(vacuumLocationList.list) > 0 } -func batchVacuumVolumeCompact(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList, preallocate int64) bool { +func batchVacuumVolumeCompact(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, + locationlist *VolumeLocationList, preallocate int64) bool { + vl.accessLock.Lock() vl.removeFromWritable(vid) + vl.accessLock.Unlock() + ch := make(chan bool, locationlist.Length()) for index, dn := range locationlist.list { - go func(index int, url string, vid storage.VolumeId) { + go func(index int, url string, vid needle.VolumeId) { glog.V(0).Infoln(index, "Start vacuuming", vid, "on", url) - err := operation.WithVolumeServerClient(url, func(volumeServerClient volume_server_pb.VolumeServerClient) error { + err := operation.WithVolumeServerClient(url, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { _, err := volumeServerClient.VacuumVolumeCompact(context.Background(), &volume_server_pb.VacuumVolumeCompactRequest{ - VolumdId: uint32(vid), + VolumeId: uint32(vid), }) return err }) @@ -65,24 +79,23 @@ func batchVacuumVolumeCompact(vl *VolumeLayout, vid storage.VolumeId, locationli }(index, dn.Url(), vid) } isVacuumSuccess := true - for _ = range locationlist.list { + for range locationlist.list { select { case canCommit := <-ch: isVacuumSuccess = isVacuumSuccess && canCommit case <-time.After(30 * time.Minute): - isVacuumSuccess = false - break + return false } } return isVacuumSuccess } -func batchVacuumVolumeCommit(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList) bool { +func batchVacuumVolumeCommit(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) bool { isCommitSuccess := true for _, dn := range locationlist.list { - glog.V(0).Infoln("Start Commiting vacuum", vid, "on", dn.Url()) - err := operation.WithVolumeServerClient(dn.Url(), func(volumeServerClient volume_server_pb.VolumeServerClient) error { + glog.V(0).Infoln("Start Committing vacuum", vid, "on", dn.Url()) + err := operation.WithVolumeServerClient(dn.Url(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { _, err := volumeServerClient.VacuumVolumeCommit(context.Background(), &volume_server_pb.VacuumVolumeCommitRequest{ - VolumdId: uint32(vid), + VolumeId: uint32(vid), }) return err }) @@ -90,7 +103,7 @@ func batchVacuumVolumeCommit(vl *VolumeLayout, vid storage.VolumeId, locationlis glog.Errorf("Error when committing vacuum %d on %s: %v", vid, dn.Url(), err) isCommitSuccess = false } else { - glog.V(0).Infof("Complete Commiting vacuum %d on %s", vid, dn.Url()) + glog.V(0).Infof("Complete Committing vacuum %d on %s", vid, dn.Url()) } if isCommitSuccess { vl.SetVolumeAvailable(dn, vid) @@ -98,12 +111,12 @@ func batchVacuumVolumeCommit(vl *VolumeLayout, vid storage.VolumeId, locationlis } return isCommitSuccess } -func batchVacuumVolumeCleanup(vl *VolumeLayout, vid storage.VolumeId, locationlist *VolumeLocationList) { +func batchVacuumVolumeCleanup(grpcDialOption grpc.DialOption, vl *VolumeLayout, vid needle.VolumeId, locationlist *VolumeLocationList) { for _, dn := range locationlist.list { glog.V(0).Infoln("Start cleaning up", vid, "on", dn.Url()) - err := operation.WithVolumeServerClient(dn.Url(), func(volumeServerClient volume_server_pb.VolumeServerClient) error { + err := operation.WithVolumeServerClient(dn.Url(), grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error { _, err := volumeServerClient.VacuumVolumeCleanup(context.Background(), &volume_server_pb.VacuumVolumeCleanupRequest{ - VolumdId: uint32(vid), + VolumeId: uint32(vid), }) return err }) @@ -115,30 +128,40 @@ func batchVacuumVolumeCleanup(vl *VolumeLayout, vid storage.VolumeId, locationli } } -func (t *Topology) Vacuum(garbageThreshold float64, preallocate int64) int { - glog.V(0).Infof("Start vacuum on demand with threshold: %f", garbageThreshold) +func (t *Topology) Vacuum(grpcDialOption grpc.DialOption, garbageThreshold float64, preallocate int64) int { + + // if there is vacuum going on, return immediately + swapped := atomic.CompareAndSwapInt64(&t.vacuumLockCounter, 0, 1) + if !swapped { + return 0 + } + defer atomic.StoreInt64(&t.vacuumLockCounter, 0) + + // now only one vacuum process going on + + glog.V(1).Infof("Start vacuum on demand with threshold: %f", garbageThreshold) for _, col := range t.collectionMap.Items() { c := col.(*Collection) for _, vl := range c.storageType2VolumeLayout.Items() { if vl != nil { volumeLayout := vl.(*VolumeLayout) - vacuumOneVolumeLayout(volumeLayout, c, garbageThreshold, preallocate) + vacuumOneVolumeLayout(grpcDialOption, volumeLayout, c, garbageThreshold, preallocate) } } } return 0 } -func vacuumOneVolumeLayout(volumeLayout *VolumeLayout, c *Collection, garbageThreshold float64, preallocate int64) { +func vacuumOneVolumeLayout(grpcDialOption grpc.DialOption, volumeLayout *VolumeLayout, c *Collection, garbageThreshold float64, preallocate int64) { volumeLayout.accessLock.RLock() - tmpMap := make(map[storage.VolumeId]*VolumeLocationList) - for vid, locationlist := range volumeLayout.vid2location { - tmpMap[vid] = locationlist + tmpMap := make(map[needle.VolumeId]*VolumeLocationList) + for vid, locationList := range volumeLayout.vid2location { + tmpMap[vid] = locationList } volumeLayout.accessLock.RUnlock() - for vid, locationlist := range tmpMap { + for vid, locationList := range tmpMap { volumeLayout.accessLock.RLock() isReadOnly, hasValue := volumeLayout.readonlyVolumes[vid] @@ -148,12 +171,13 @@ func vacuumOneVolumeLayout(volumeLayout *VolumeLayout, c *Collection, garbageThr continue } - glog.V(0).Infof("check vacuum on collection:%s volume:%d", c.Name, vid) - if batchVacuumVolumeCheck(volumeLayout, vid, locationlist, garbageThreshold) { - if batchVacuumVolumeCompact(volumeLayout, vid, locationlist, preallocate) { - batchVacuumVolumeCommit(volumeLayout, vid, locationlist) + glog.V(2).Infof("check vacuum on collection:%s volume:%d", c.Name, vid) + if vacuumLocationList, needVacuum := batchVacuumVolumeCheck( + grpcDialOption, volumeLayout, vid, locationList, garbageThreshold); needVacuum { + if batchVacuumVolumeCompact(grpcDialOption, volumeLayout, vid, vacuumLocationList, preallocate) { + batchVacuumVolumeCommit(grpcDialOption, volumeLayout, vid, vacuumLocationList) } else { - batchVacuumVolumeCleanup(volumeLayout, vid, locationlist) + batchVacuumVolumeCleanup(grpcDialOption, volumeLayout, vid, vacuumLocationList) } } } diff --git a/weed/topology/volume_growth.go b/weed/topology/volume_growth.go index ddf687419..446c88f60 100644 --- a/weed/topology/volume_growth.go +++ b/weed/topology/volume_growth.go @@ -5,6 +5,12 @@ import ( "math/rand" "sync" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" + "github.com/chrislusf/seaweedfs/weed/util" + + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage" ) @@ -18,13 +24,14 @@ This package is created to resolve these replica placement issues: */ type VolumeGrowOption struct { - Collection string - ReplicaPlacement *storage.ReplicaPlacement - Ttl *storage.TTL - Prealloacte int64 - DataCenter string - Rack string - DataNode string + Collection string + ReplicaPlacement *super_block.ReplicaPlacement + Ttl *needle.TTL + Prealloacte int64 + DataCenter string + Rack string + DataNode string + MemoryMapMaxSizeMb uint32 } type VolumeGrowth struct { @@ -42,32 +49,40 @@ func NewDefaultVolumeGrowth() *VolumeGrowth { // one replication type may need rp.GetCopyCount() actual volumes // given copyCount, how many logical volumes to create func (vg *VolumeGrowth) findVolumeCount(copyCount int) (count int) { + v := util.GetViper() + v.SetDefault("master.volume_growth.copy_1", 7) + v.SetDefault("master.volume_growth.copy_2", 6) + v.SetDefault("master.volume_growth.copy_3", 3) + v.SetDefault("master.volume_growth.copy_other", 1) switch copyCount { case 1: - count = 7 + count = v.GetInt("master.volume_growth.copy_1") case 2: - count = 6 + count = v.GetInt("master.volume_growth.copy_2") case 3: - count = 3 + count = v.GetInt("master.volume_growth.copy_3") default: - count = 1 + count = v.GetInt("master.volume_growth.copy_other") } return } -func (vg *VolumeGrowth) AutomaticGrowByType(option *VolumeGrowOption, topo *Topology) (count int, err error) { - count, err = vg.GrowByCountAndType(vg.findVolumeCount(option.ReplicaPlacement.GetCopyCount()), option, topo) +func (vg *VolumeGrowth) AutomaticGrowByType(option *VolumeGrowOption, grpcDialOption grpc.DialOption, topo *Topology, targetCount int) (count int, err error) { + if targetCount == 0 { + targetCount = vg.findVolumeCount(option.ReplicaPlacement.GetCopyCount()) + } + count, err = vg.GrowByCountAndType(grpcDialOption, targetCount, option, topo) if count > 0 && count%option.ReplicaPlacement.GetCopyCount() == 0 { return count, nil } return count, err } -func (vg *VolumeGrowth) GrowByCountAndType(targetCount int, option *VolumeGrowOption, topo *Topology) (counter int, err error) { +func (vg *VolumeGrowth) GrowByCountAndType(grpcDialOption grpc.DialOption, targetCount int, option *VolumeGrowOption, topo *Topology) (counter int, err error) { vg.accessLock.Lock() defer vg.accessLock.Unlock() for i := 0; i < targetCount; i++ { - if c, e := vg.findAndGrow(topo, option); e == nil { + if c, e := vg.findAndGrow(grpcDialOption, topo, option); e == nil { counter += c } else { return counter, e @@ -76,13 +91,16 @@ func (vg *VolumeGrowth) GrowByCountAndType(targetCount int, option *VolumeGrowOp return } -func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (int, error) { +func (vg *VolumeGrowth) findAndGrow(grpcDialOption grpc.DialOption, topo *Topology, option *VolumeGrowOption) (int, error) { servers, e := vg.findEmptySlotsForOneVolume(topo, option) if e != nil { return 0, e } - vid := topo.NextVolumeId() - err := vg.grow(topo, vid, option, servers...) + vid, raftErr := topo.NextVolumeId() + if raftErr != nil { + return 0, raftErr + } + err := vg.grow(grpcDialOption, topo, vid, option, servers...) return len(servers), err } @@ -94,14 +112,14 @@ func (vg *VolumeGrowth) findAndGrow(topo *Topology, option *VolumeGrowOption) (i func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *VolumeGrowOption) (servers []*DataNode, err error) { //find main datacenter and other data centers rp := option.ReplicaPlacement - mainDataCenter, otherDataCenters, dc_err := topo.RandomlyPickNodes(rp.DiffDataCenterCount+1, func(node Node) error { + mainDataCenter, otherDataCenters, dc_err := topo.PickNodesByWeight(rp.DiffDataCenterCount+1, func(node Node) error { if option.DataCenter != "" && node.IsDataCenter() && node.Id() != NodeId(option.DataCenter) { return fmt.Errorf("Not matching preferred data center:%s", option.DataCenter) } if len(node.Children()) < rp.DiffRackCount+1 { return fmt.Errorf("Only has %d racks, not enough for %d.", len(node.Children()), rp.DiffRackCount+1) } - if node.FreeSpace() < rp.DiffRackCount+rp.SameRackCount+1 { + if node.FreeSpace() < int64(rp.DiffRackCount+rp.SameRackCount+1) { return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.DiffRackCount+rp.SameRackCount+1) } possibleRacksCount := 0 @@ -126,11 +144,11 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum } //find main rack and other racks - mainRack, otherRacks, rack_err := mainDataCenter.(*DataCenter).RandomlyPickNodes(rp.DiffRackCount+1, func(node Node) error { + mainRack, otherRacks, rackErr := mainDataCenter.(*DataCenter).PickNodesByWeight(rp.DiffRackCount+1, func(node Node) error { if option.Rack != "" && node.IsRack() && node.Id() != NodeId(option.Rack) { return fmt.Errorf("Not matching preferred rack:%s", option.Rack) } - if node.FreeSpace() < rp.SameRackCount+1 { + if node.FreeSpace() < int64(rp.SameRackCount+1) { return fmt.Errorf("Free:%d < Expected:%d", node.FreeSpace(), rp.SameRackCount+1) } if len(node.Children()) < rp.SameRackCount+1 { @@ -148,12 +166,12 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum } return nil }) - if rack_err != nil { - return nil, rack_err + if rackErr != nil { + return nil, rackErr } //find main rack and other racks - mainServer, otherServers, server_err := mainRack.(*Rack).RandomlyPickNodes(rp.SameRackCount+1, func(node Node) error { + mainServer, otherServers, serverErr := mainRack.(*Rack).PickNodesByWeight(rp.SameRackCount+1, func(node Node) error { if option.DataNode != "" && node.IsDataNode() && node.Id() != NodeId(option.DataNode) { return fmt.Errorf("Not matching preferred data node:%s", option.DataNode) } @@ -162,8 +180,8 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum } return nil }) - if server_err != nil { - return nil, server_err + if serverErr != nil { + return nil, serverErr } servers = append(servers, mainServer.(*DataNode)) @@ -171,7 +189,7 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum servers = append(servers, server.(*DataNode)) } for _, rack := range otherRacks { - r := rand.Intn(rack.FreeSpace()) + r := rand.Int63n(rack.FreeSpace()) if server, e := rack.ReserveOneVolume(r); e == nil { servers = append(servers, server) } else { @@ -179,7 +197,7 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum } } for _, datacenter := range otherDataCenters { - r := rand.Intn(datacenter.FreeSpace()) + r := rand.Int63n(datacenter.FreeSpace()) if server, e := datacenter.ReserveOneVolume(r); e == nil { servers = append(servers, server) } else { @@ -189,16 +207,16 @@ func (vg *VolumeGrowth) findEmptySlotsForOneVolume(topo *Topology, option *Volum return } -func (vg *VolumeGrowth) grow(topo *Topology, vid storage.VolumeId, option *VolumeGrowOption, servers ...*DataNode) error { +func (vg *VolumeGrowth) grow(grpcDialOption grpc.DialOption, topo *Topology, vid needle.VolumeId, option *VolumeGrowOption, servers ...*DataNode) error { for _, server := range servers { - if err := AllocateVolume(server, vid, option); err == nil { + if err := AllocateVolume(server, grpcDialOption, vid, option); err == nil { vi := storage.VolumeInfo{ Id: vid, Size: 0, Collection: option.Collection, ReplicaPlacement: option.ReplicaPlacement, Ttl: option.Ttl, - Version: storage.CurrentVersion, + Version: needle.CurrentVersion, } server.AddOrUpdateVolume(vi) topo.RegisterVolumeLayout(vi, server) diff --git a/weed/topology/volume_growth_test.go b/weed/topology/volume_growth_test.go index f983df1ec..6ff5be0eb 100644 --- a/weed/topology/volume_growth_test.go +++ b/weed/topology/volume_growth_test.go @@ -7,6 +7,8 @@ import ( "github.com/chrislusf/seaweedfs/weed/sequence" "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" ) var topologyLayout = ` @@ -96,12 +98,12 @@ func setup(topologyLayout string) *Topology { for _, v := range serverMap["volumes"].([]interface{}) { m := v.(map[string]interface{}) vi := storage.VolumeInfo{ - Id: storage.VolumeId(int64(m["id"].(float64))), + Id: needle.VolumeId(int64(m["id"].(float64))), Size: uint64(m["size"].(float64)), - Version: storage.CurrentVersion} + Version: needle.CurrentVersion} server.AddOrUpdateVolume(vi) } - server.UpAdjustMaxVolumeCountDelta(int(serverMap["limit"].(float64))) + server.UpAdjustMaxVolumeCountDelta(int64(serverMap["limit"].(float64))) } } } @@ -112,7 +114,7 @@ func setup(topologyLayout string) *Topology { func TestFindEmptySlotsForOneVolume(t *testing.T) { topo := setup(topologyLayout) vg := NewDefaultVolumeGrowth() - rp, _ := storage.NewReplicaPlacementFromString("002") + rp, _ := super_block.NewReplicaPlacementFromString("002") volumeGrowOption := &VolumeGrowOption{ Collection: "", ReplicaPlacement: rp, @@ -129,3 +131,212 @@ func TestFindEmptySlotsForOneVolume(t *testing.T) { fmt.Println("assigned node :", server.Id()) } } + +var topologyLayout2 = ` +{ + "dc1":{ + "rack1":{ + "server111":{ + "volumes":[ + {"id":1, "size":12312}, + {"id":2, "size":12312}, + {"id":3, "size":12312} + ], + "limit":300 + }, + "server112":{ + "volumes":[ + {"id":4, "size":12312}, + {"id":5, "size":12312}, + {"id":6, "size":12312} + ], + "limit":300 + }, + "server113":{ + "volumes":[], + "limit":300 + }, + "server114":{ + "volumes":[], + "limit":300 + }, + "server115":{ + "volumes":[], + "limit":300 + }, + "server116":{ + "volumes":[], + "limit":300 + } + }, + "rack2":{ + "server121":{ + "volumes":[ + {"id":4, "size":12312}, + {"id":5, "size":12312}, + {"id":6, "size":12312} + ], + "limit":300 + }, + "server122":{ + "volumes":[], + "limit":300 + }, + "server123":{ + "volumes":[ + {"id":2, "size":12312}, + {"id":3, "size":12312}, + {"id":4, "size":12312} + ], + "limit":300 + }, + "server124":{ + "volumes":[], + "limit":300 + }, + "server125":{ + "volumes":[], + "limit":300 + }, + "server126":{ + "volumes":[], + "limit":300 + } + }, + "rack3":{ + "server131":{ + "volumes":[], + "limit":300 + }, + "server132":{ + "volumes":[], + "limit":300 + }, + "server133":{ + "volumes":[], + "limit":300 + }, + "server134":{ + "volumes":[], + "limit":300 + }, + "server135":{ + "volumes":[], + "limit":300 + }, + "server136":{ + "volumes":[], + "limit":300 + } + } + } +} +` + +func TestReplication011(t *testing.T) { + topo := setup(topologyLayout2) + vg := NewDefaultVolumeGrowth() + rp, _ := super_block.NewReplicaPlacementFromString("011") + volumeGrowOption := &VolumeGrowOption{ + Collection: "MAIL", + ReplicaPlacement: rp, + DataCenter: "dc1", + Rack: "", + DataNode: "", + } + servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption) + if err != nil { + fmt.Println("finding empty slots error :", err) + t.Fail() + } + for _, server := range servers { + fmt.Println("assigned node :", server.Id()) + } +} + +var topologyLayout3 = ` +{ + "dc1":{ + "rack1":{ + "server111":{ + "volumes":[], + "limit":2000 + } + } + }, + "dc2":{ + "rack2":{ + "server222":{ + "volumes":[], + "limit":2000 + } + } + }, + "dc3":{ + "rack3":{ + "server333":{ + "volumes":[], + "limit":1000 + } + } + }, + "dc4":{ + "rack4":{ + "server444":{ + "volumes":[], + "limit":1000 + } + } + }, + "dc5":{ + "rack5":{ + "server555":{ + "volumes":[], + "limit":500 + } + } + }, + "dc6":{ + "rack6":{ + "server666":{ + "volumes":[], + "limit":500 + } + } + } +} +` + +func TestFindEmptySlotsForOneVolumeScheduleByWeight(t *testing.T) { + topo := setup(topologyLayout3) + vg := NewDefaultVolumeGrowth() + rp, _ := super_block.NewReplicaPlacementFromString("100") + volumeGrowOption := &VolumeGrowOption{ + Collection: "Weight", + ReplicaPlacement: rp, + DataCenter: "", + Rack: "", + DataNode: "", + } + + distribution := map[NodeId]int{} + // assign 1000 volumes + for i := 0; i < 1000; i++ { + servers, err := vg.findEmptySlotsForOneVolume(topo, volumeGrowOption) + if err != nil { + fmt.Println("finding empty slots error :", err) + t.Fail() + } + for _, server := range servers { + // fmt.Println("assigned node :", server.Id()) + if _, ok := distribution[server.id]; !ok { + distribution[server.id] = 0 + } + distribution[server.id] += 1 + } + } + + for k, v := range distribution { + fmt.Printf("%s : %d\n", k, v) + } +} diff --git a/weed/topology/volume_layout.go b/weed/topology/volume_layout.go index 44a25565e..7633b28be 100644 --- a/weed/topology/volume_layout.go +++ b/weed/topology/volume_layout.go @@ -5,31 +5,40 @@ import ( "fmt" "math/rand" "sync" + "time" "github.com/chrislusf/seaweedfs/weed/glog" "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "github.com/chrislusf/seaweedfs/weed/storage/super_block" ) // mapping from volume to its locations, inverted from server to volume type VolumeLayout struct { - rp *storage.ReplicaPlacement - ttl *storage.TTL - vid2location map[storage.VolumeId]*VolumeLocationList - writables []storage.VolumeId // transient array of writable volume id - readonlyVolumes map[storage.VolumeId]bool // transient set of readonly volumes - oversizedVolumes map[storage.VolumeId]bool // set of oversized volumes + rp *super_block.ReplicaPlacement + ttl *needle.TTL + vid2location map[needle.VolumeId]*VolumeLocationList + writables []needle.VolumeId // transient array of writable volume id + readonlyVolumes map[needle.VolumeId]bool // transient set of readonly volumes + oversizedVolumes map[needle.VolumeId]bool // set of oversized volumes volumeSizeLimit uint64 accessLock sync.RWMutex } -func NewVolumeLayout(rp *storage.ReplicaPlacement, ttl *storage.TTL, volumeSizeLimit uint64) *VolumeLayout { +type VolumeLayoutStats struct { + TotalSize uint64 + UsedSize uint64 + FileCount uint64 +} + +func NewVolumeLayout(rp *super_block.ReplicaPlacement, ttl *needle.TTL, volumeSizeLimit uint64) *VolumeLayout { return &VolumeLayout{ rp: rp, ttl: ttl, - vid2location: make(map[storage.VolumeId]*VolumeLocationList), - writables: *new([]storage.VolumeId), - readonlyVolumes: make(map[storage.VolumeId]bool), - oversizedVolumes: make(map[storage.VolumeId]bool), + vid2location: make(map[needle.VolumeId]*VolumeLocationList), + writables: *new([]needle.VolumeId), + readonlyVolumes: make(map[needle.VolumeId]bool), + oversizedVolumes: make(map[needle.VolumeId]bool), volumeSizeLimit: volumeSizeLimit, } } @@ -46,11 +55,11 @@ func (vl *VolumeLayout) RegisterVolume(v *storage.VolumeInfo, dn *DataNode) { vl.vid2location[v.Id] = NewVolumeLocationList() } vl.vid2location[v.Id].Set(dn) - glog.V(4).Infoln("volume", v.Id, "added to dn", dn.Id(), "len", vl.vid2location[v.Id].Length(), "copy", v.ReplicaPlacement.GetCopyCount()) + // glog.V(4).Infof("volume %d added to %s len %d copy %d", v.Id, dn.Id(), vl.vid2location[v.Id].Length(), v.ReplicaPlacement.GetCopyCount()) for _, dn := range vl.vid2location[v.Id].list { - if v_info, err := dn.GetVolumesById(v.Id); err == nil { - if v_info.ReadOnly { - glog.V(3).Infof("vid %d removed from writable", v.Id) + if vInfo, err := dn.GetVolumesById(v.Id); err == nil { + if vInfo.ReadOnly { + glog.V(1).Infof("vid %d removed from writable", v.Id) vl.removeFromWritable(v.Id) vl.readonlyVolumes[v.Id] = true return @@ -58,23 +67,19 @@ func (vl *VolumeLayout) RegisterVolume(v *storage.VolumeInfo, dn *DataNode) { delete(vl.readonlyVolumes, v.Id) } } else { - glog.V(3).Infof("vid %d removed from writable", v.Id) + glog.V(1).Infof("vid %d removed from writable", v.Id) vl.removeFromWritable(v.Id) delete(vl.readonlyVolumes, v.Id) return } } - if vl.vid2location[v.Id].Length() == vl.rp.GetCopyCount() && vl.isWritable(v) { - if _, ok := vl.oversizedVolumes[v.Id]; !ok { - vl.addToWritable(v.Id) - } - } else { - vl.rememberOversizedVolumne(v) - vl.removeFromWritable(v.Id) - } + + vl.rememberOversizedVolume(v) + vl.ensureCorrectWritables(v) + } -func (vl *VolumeLayout) rememberOversizedVolumne(v *storage.VolumeInfo) { +func (vl *VolumeLayout) rememberOversizedVolume(v *storage.VolumeInfo) { if vl.isOversized(v) { vl.oversizedVolumes[v.Id] = true } @@ -84,11 +89,34 @@ func (vl *VolumeLayout) UnRegisterVolume(v *storage.VolumeInfo, dn *DataNode) { vl.accessLock.Lock() defer vl.accessLock.Unlock() - vl.removeFromWritable(v.Id) - delete(vl.vid2location, v.Id) + // remove from vid2location map + location, ok := vl.vid2location[v.Id] + if !ok { + return + } + + if location.Remove(dn) { + + vl.ensureCorrectWritables(v) + + if location.Length() == 0 { + delete(vl.vid2location, v.Id) + } + + } +} + +func (vl *VolumeLayout) ensureCorrectWritables(v *storage.VolumeInfo) { + if vl.vid2location[v.Id].Length() == vl.rp.GetCopyCount() && vl.isWritable(v) { + if _, ok := vl.oversizedVolumes[v.Id]; !ok { + vl.addToWritable(v.Id) + } + } else { + vl.removeFromWritable(v.Id) + } } -func (vl *VolumeLayout) addToWritable(vid storage.VolumeId) { +func (vl *VolumeLayout) addToWritable(vid needle.VolumeId) { for _, id := range vl.writables { if vid == id { return @@ -103,7 +131,7 @@ func (vl *VolumeLayout) isOversized(v *storage.VolumeInfo) bool { func (vl *VolumeLayout) isWritable(v *storage.VolumeInfo) bool { return !vl.isOversized(v) && - v.Version == storage.CurrentVersion && + v.Version == needle.CurrentVersion && !v.ReadOnly } @@ -114,7 +142,7 @@ func (vl *VolumeLayout) isEmpty() bool { return len(vl.vid2location) == 0 } -func (vl *VolumeLayout) Lookup(vid storage.VolumeId) []*DataNode { +func (vl *VolumeLayout) Lookup(vid needle.VolumeId) []*DataNode { vl.accessLock.RLock() defer vl.accessLock.RUnlock() @@ -134,24 +162,24 @@ func (vl *VolumeLayout) ListVolumeServers() (nodes []*DataNode) { return } -func (vl *VolumeLayout) PickForWrite(count uint64, option *VolumeGrowOption) (*storage.VolumeId, uint64, *VolumeLocationList, error) { +func (vl *VolumeLayout) PickForWrite(count uint64, option *VolumeGrowOption) (*needle.VolumeId, uint64, *VolumeLocationList, error) { vl.accessLock.RLock() defer vl.accessLock.RUnlock() - len_writers := len(vl.writables) - if len_writers <= 0 { + lenWriters := len(vl.writables) + if lenWriters <= 0 { glog.V(0).Infoln("No more writable volumes!") return nil, 0, nil, errors.New("No more writable volumes!") } if option.DataCenter == "" { - vid := vl.writables[rand.Intn(len_writers)] + vid := vl.writables[rand.Intn(lenWriters)] locationList := vl.vid2location[vid] if locationList != nil { return &vid, count, locationList, nil } return nil, 0, nil, errors.New("Strangely vid " + vid.String() + " is on no machine!") } - var vid storage.VolumeId + var vid needle.VolumeId var locationList *VolumeLocationList counter := 0 for _, v := range vl.writables { @@ -198,7 +226,7 @@ func (vl *VolumeLayout) GetActiveVolumeCount(option *VolumeGrowOption) int { return counter } -func (vl *VolumeLayout) removeFromWritable(vid storage.VolumeId) bool { +func (vl *VolumeLayout) removeFromWritable(vid needle.VolumeId) bool { toDeleteIndex := -1 for k, id := range vl.writables { if id == vid { @@ -213,7 +241,7 @@ func (vl *VolumeLayout) removeFromWritable(vid storage.VolumeId) bool { } return false } -func (vl *VolumeLayout) setVolumeWritable(vid storage.VolumeId) bool { +func (vl *VolumeLayout) setVolumeWritable(vid needle.VolumeId) bool { for _, v := range vl.writables { if v == vid { return false @@ -224,7 +252,7 @@ func (vl *VolumeLayout) setVolumeWritable(vid storage.VolumeId) bool { return true } -func (vl *VolumeLayout) SetVolumeUnavailable(dn *DataNode, vid storage.VolumeId) bool { +func (vl *VolumeLayout) SetVolumeUnavailable(dn *DataNode, vid needle.VolumeId) bool { vl.accessLock.Lock() defer vl.accessLock.Unlock() @@ -238,18 +266,18 @@ func (vl *VolumeLayout) SetVolumeUnavailable(dn *DataNode, vid storage.VolumeId) } return false } -func (vl *VolumeLayout) SetVolumeAvailable(dn *DataNode, vid storage.VolumeId) bool { +func (vl *VolumeLayout) SetVolumeAvailable(dn *DataNode, vid needle.VolumeId) bool { vl.accessLock.Lock() defer vl.accessLock.Unlock() vl.vid2location[vid].Set(dn) - if vl.vid2location[vid].Length() >= vl.rp.GetCopyCount() { + if vl.vid2location[vid].Length() == vl.rp.GetCopyCount() { return vl.setVolumeWritable(vid) } return false } -func (vl *VolumeLayout) SetVolumeCapacityFull(vid storage.VolumeId) bool { +func (vl *VolumeLayout) SetVolumeCapacityFull(vid needle.VolumeId) bool { vl.accessLock.Lock() defer vl.accessLock.Unlock() @@ -265,3 +293,25 @@ func (vl *VolumeLayout) ToMap() map[string]interface{} { //m["locations"] = vl.vid2location return m } + +func (vl *VolumeLayout) Stats() *VolumeLayoutStats { + vl.accessLock.RLock() + defer vl.accessLock.RUnlock() + + ret := &VolumeLayoutStats{} + + freshThreshold := time.Now().Unix() - 60 + + for vid, vll := range vl.vid2location { + size, fileCount := vll.Stats(vid, freshThreshold) + ret.FileCount += uint64(fileCount) + ret.UsedSize += size + if vl.readonlyVolumes[vid] { + ret.TotalSize += size + } else { + ret.TotalSize += vl.volumeSizeLimit + } + } + + return ret +} diff --git a/weed/topology/volume_location_list.go b/weed/topology/volume_location_list.go index d5eaf5e92..8905c54b5 100644 --- a/weed/topology/volume_location_list.go +++ b/weed/topology/volume_location_list.go @@ -2,6 +2,8 @@ package topology import ( "fmt" + + "github.com/chrislusf/seaweedfs/weed/storage/needle" ) type VolumeLocationList struct { @@ -63,3 +65,15 @@ func (dnll *VolumeLocationList) Refresh(freshThreshHold int64) { dnll.list = l } } + +func (dnll *VolumeLocationList) Stats(vid needle.VolumeId, freshThreshHold int64) (size uint64, fileCount int) { + for _, dnl := range dnll.list { + if dnl.LastSeen < freshThreshHold { + vinfo, err := dnl.GetVolumesById(vid) + if err == nil { + return vinfo.Size - vinfo.DeletedByteCount, vinfo.FileCount - vinfo.DeleteCount + } + } + } + return 0, 0 +} diff --git a/weed/util/bytes.go b/weed/util/bytes.go index dfa4ae665..9c7e5e2cb 100644 --- a/weed/util/bytes.go +++ b/weed/util/bytes.go @@ -1,5 +1,10 @@ package util +import ( + "crypto/md5" + "io" +) + // big endian func BytesToUint64(b []byte) (v uint64) { @@ -43,3 +48,29 @@ func Uint16toBytes(b []byte, v uint16) { func Uint8toBytes(b []byte, v uint8) { b[0] = byte(v) } + +// returns a 64 bit big int +func HashStringToLong(dir string) (v int64) { + h := md5.New() + io.WriteString(h, dir) + + b := h.Sum(nil) + + v += int64(b[0]) + v <<= 8 + v += int64(b[1]) + v <<= 8 + v += int64(b[2]) + v <<= 8 + v += int64(b[3]) + v <<= 8 + v += int64(b[4]) + v <<= 8 + v += int64(b[5]) + v <<= 8 + v += int64(b[6]) + v <<= 8 + v += int64(b[7]) + + return +} diff --git a/weed/util/cipher.go b/weed/util/cipher.go new file mode 100644 index 000000000..7bcb6559a --- /dev/null +++ b/weed/util/cipher.go @@ -0,0 +1,81 @@ +package util + +import ( + "bytes" + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "errors" + "fmt" + "io" + "io/ioutil" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + +type CipherKey []byte + +func GenCipherKey() CipherKey { + key := make([]byte, 32) + if _, err := io.ReadFull(rand.Reader, key); err != nil { + glog.Fatalf("random key gen: %v", err) + } + return CipherKey(key) +} + +func Encrypt(plaintext []byte, key CipherKey) ([]byte, error) { + c, err := aes.NewCipher(key) + if err != nil { + return nil, err + } + + gcm, err := cipher.NewGCM(c) + if err != nil { + return nil, err + } + + nonce := make([]byte, gcm.NonceSize()) + if _, err = io.ReadFull(rand.Reader, nonce); err != nil { + return nil, err + } + + return gcm.Seal(nonce, nonce, plaintext, nil), nil +} + +func Decrypt(ciphertext []byte, key CipherKey) ([]byte, error) { + c, err := aes.NewCipher(key) + if err != nil { + return nil, err + } + + gcm, err := cipher.NewGCM(c) + if err != nil { + return nil, err + } + + nonceSize := gcm.NonceSize() + if len(ciphertext) < nonceSize { + return nil, errors.New("ciphertext too short") + } + + nonce, ciphertext := ciphertext[:nonceSize], ciphertext[nonceSize:] + return gcm.Open(nil, nonce, ciphertext, nil) +} + +func EncryptReader(clearReader io.Reader) (cipherKey CipherKey, encryptedReader io.ReadCloser, clearDataLen, encryptedDataLen int, err error) { + clearData, err := ioutil.ReadAll(clearReader) + if err != nil { + err = fmt.Errorf("read raw input: %v", err) + return + } + clearDataLen = len(clearData) + cipherKey = GenCipherKey() + encryptedData, err := Encrypt(clearData, cipherKey) + if err != nil { + err = fmt.Errorf("encrypt input: %v", err) + return + } + encryptedDataLen = len(encryptedData) + encryptedReader = ioutil.NopCloser(bytes.NewReader(encryptedData)) + return +} diff --git a/weed/util/compression.go b/weed/util/compression.go new file mode 100644 index 000000000..6072df632 --- /dev/null +++ b/weed/util/compression.go @@ -0,0 +1,101 @@ +package util + +import ( + "bytes" + "compress/flate" + "compress/gzip" + "io/ioutil" + "strings" + + "github.com/chrislusf/seaweedfs/weed/glog" + "golang.org/x/tools/godoc/util" +) + +func GzipData(input []byte) ([]byte, error) { + buf := new(bytes.Buffer) + w, _ := gzip.NewWriterLevel(buf, flate.BestSpeed) + if _, err := w.Write(input); err != nil { + glog.V(2).Infoln("error compressing data:", err) + return nil, err + } + if err := w.Close(); err != nil { + glog.V(2).Infoln("error closing compressed data:", err) + return nil, err + } + return buf.Bytes(), nil +} +func UnGzipData(input []byte) ([]byte, error) { + buf := bytes.NewBuffer(input) + r, _ := gzip.NewReader(buf) + defer r.Close() + output, err := ioutil.ReadAll(r) + if err != nil { + glog.V(2).Infoln("error uncompressing data:", err) + } + return output, err +} + +/* +* Default more not to gzip since gzip can be done on client side. + */func IsGzippable(ext, mtype string, data []byte) bool { + + shouldBeZipped, iAmSure := IsGzippableFileType(ext, mtype) + if iAmSure { + return shouldBeZipped + } + + isMostlyText := util.IsText(data) + + return isMostlyText +} + +/* +* Default more not to gzip since gzip can be done on client side. + */func IsGzippableFileType(ext, mtype string) (shouldBeZipped, iAmSure bool) { + + // text + if strings.HasPrefix(mtype, "text/") { + return true, true + } + + // images + switch ext { + case ".svg", ".bmp", ".wav": + return true, true + } + if strings.HasPrefix(mtype, "image/") { + return false, true + } + + // by file name extension + switch ext { + case ".zip", ".rar", ".gz", ".bz2", ".xz": + return false, true + case ".pdf", ".txt", ".html", ".htm", ".css", ".js", ".json": + return true, true + case ".php", ".java", ".go", ".rb", ".c", ".cpp", ".h", ".hpp": + return true, true + case ".png", ".jpg", ".jpeg": + return false, true + } + + // by mime type + if strings.HasPrefix(mtype, "application/") { + if strings.HasSuffix(mtype, "xml") { + return true, true + } + if strings.HasSuffix(mtype, "script") { + return true, true + } + + } + + if strings.HasPrefix(mtype, "audio/") { + switch strings.TrimPrefix(mtype, "audio/") { + case "wave", "wav", "x-wav", "x-pn-wav": + return true, true + } + } + + return false, false +} diff --git a/weed/util/config.go b/weed/util/config.go index 77cab3019..dfbfdbd82 100644 --- a/weed/util/config.go +++ b/weed/util/config.go @@ -1,10 +1,50 @@ package util +import ( + "strings" + + "github.com/spf13/viper" + + "github.com/chrislusf/seaweedfs/weed/glog" +) + type Configuration interface { GetString(key string) string GetBool(key string) bool GetInt(key string) int - GetInt64(key string) int64 - GetFloat64(key string) float64 GetStringSlice(key string) []string + SetDefault(key string, value interface{}) +} + +func LoadConfiguration(configFileName string, required bool) (loaded bool) { + + // find a filer store + viper.SetConfigName(configFileName) // name of config file (without extension) + viper.AddConfigPath(".") // optionally look for config in the working directory + viper.AddConfigPath("$HOME/.seaweedfs") // call multiple times to add many search paths + viper.AddConfigPath("/etc/seaweedfs/") // path to look for the config file in + + glog.V(1).Infof("Reading %s.toml from %s", configFileName, viper.ConfigFileUsed()) + + if err := viper.MergeInConfig(); err != nil { // Handle errors reading the config file + glog.V(0).Infof("Reading %s: %v", viper.ConfigFileUsed(), err) + if required { + glog.Fatalf("Failed to load %s.toml file from current directory, or $HOME/.seaweedfs/, or /etc/seaweedfs/"+ + "\n\nPlease use this command to generate the default %s.toml file\n"+ + " weed scaffold -config=%s -output=.\n\n\n", + configFileName, configFileName, configFileName) + } else { + return false + } + } + + return true +} + +func GetViper() *viper.Viper { + v := viper.GetViper() + v.AutomaticEnv() + v.SetEnvPrefix("weed") + v.SetEnvKeyReplacer(strings.NewReplacer(".", "_")) + return v } diff --git a/weed/util/constants.go b/weed/util/constants.go index a9077297f..c23bc11f6 100644 --- a/weed/util/constants.go +++ b/weed/util/constants.go @@ -1,5 +1,9 @@ package util -const ( - VERSION = "0.99" +import ( + "fmt" +) + +var ( + VERSION = fmt.Sprintf("%s %d.%d", sizeLimit, 1, 61) ) diff --git a/weed/util/constants_4bytes.go b/weed/util/constants_4bytes.go new file mode 100644 index 000000000..a29d9d3b0 --- /dev/null +++ b/weed/util/constants_4bytes.go @@ -0,0 +1,8 @@ +// +build !5BytesOffset + +package util + +const ( + sizeLimit = "30GB" + VolumeSizeLimitGB = 30 +) diff --git a/weed/util/constants_5bytes.go b/weed/util/constants_5bytes.go new file mode 100644 index 000000000..91ce4066f --- /dev/null +++ b/weed/util/constants_5bytes.go @@ -0,0 +1,8 @@ +// +build 5BytesOffset + +package util + +const ( + sizeLimit = "8000GB" + VolumeSizeLimitGB = 8000 +) diff --git a/weed/util/file_util.go b/weed/util/file_util.go index 8ff2978ba..bef9f7cd6 100644 --- a/weed/util/file_util.go +++ b/weed/util/file_util.go @@ -3,6 +3,7 @@ package util import ( "errors" "os" + "time" "github.com/chrislusf/seaweedfs/weed/glog" ) @@ -30,3 +31,31 @@ func GetFileSize(file *os.File) (size int64, err error) { } return } + +func FileExists(filename string) bool { + + _, err := os.Stat(filename) + if os.IsNotExist(err) { + return false + } + return true + +} + +func CheckFile(filename string) (exists, canRead, canWrite bool, modTime time.Time, fileSize int64) { + exists = true + fi, err := os.Stat(filename) + if os.IsNotExist(err) { + exists = false + return + } + if fi.Mode()&0400 != 0 { + canRead = true + } + if fi.Mode()&0200 != 0 { + canWrite = true + } + modTime = fi.ModTime() + fileSize = fi.Size() + return +} diff --git a/weed/util/file_util_non_posix.go b/weed/util/file_util_non_posix.go new file mode 100644 index 000000000..ffcfef6d5 --- /dev/null +++ b/weed/util/file_util_non_posix.go @@ -0,0 +1,12 @@ +// +build linux darwin freebsd netbsd openbsd plan9 solaris zos + +package util + +import ( + "os" + "syscall" +) + +func GetFileUidGid(fi os.FileInfo) (uid, gid uint32) { + return fi.Sys().(*syscall.Stat_t).Uid, fi.Sys().(*syscall.Stat_t).Gid +} diff --git a/weed/util/file_util_posix.go b/weed/util/file_util_posix.go new file mode 100644 index 000000000..22ca60b3b --- /dev/null +++ b/weed/util/file_util_posix.go @@ -0,0 +1,11 @@ +// +build windows + +package util + +import ( + "os" +) + +func GetFileUidGid(fi os.FileInfo) (uid, gid uint32) { + return 0, 0 +} diff --git a/weed/util/grpc_client_server.go b/weed/util/grpc_client_server.go deleted file mode 100644 index 8dbb4c0cd..000000000 --- a/weed/util/grpc_client_server.go +++ /dev/null @@ -1,28 +0,0 @@ -package util - -import ( - "time" - - "google.golang.org/grpc" - "google.golang.org/grpc/keepalive" -) - -func NewGrpcServer() *grpc.Server { - return grpc.NewServer(grpc.KeepaliveParams(keepalive.ServerParameters{ - Time: 10 * time.Second, // wait time before ping if no activity - Timeout: 20 * time.Second, // ping timeout - }), grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ - MinTime: 60 * time.Second, // min time a client should wait before sending a ping - })) -} - -func GrpcDial(address string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { - - opts = append(opts, grpc.WithInsecure()) - opts = append(opts, grpc.WithKeepaliveParams(keepalive.ClientParameters{ - Time: 30 * time.Second, // client ping server if no activity for this long - Timeout: 20 * time.Second, - })) - - return grpc.Dial(address, opts...) -} diff --git a/weed/util/http_util.go b/weed/util/http_util.go index 7ae5713bb..750516b92 100644 --- a/weed/util/http_util.go +++ b/weed/util/http_util.go @@ -2,6 +2,7 @@ package util import ( "bytes" + "compress/gzip" "encoding/json" "errors" "fmt" @@ -11,7 +12,7 @@ import ( "net/url" "strings" - "github.com/chrislusf/seaweedfs/weed/security" + "github.com/chrislusf/seaweedfs/weed/glog" ) var ( @@ -23,22 +24,24 @@ func init() { Transport = &http.Transport{ MaxIdleConnsPerHost: 1024, } - client = &http.Client{Transport: Transport} + client = &http.Client{ + Transport: Transport, + } } func PostBytes(url string, body []byte) ([]byte, error) { - r, err := client.Post(url, "application/octet-stream", bytes.NewReader(body)) + r, err := client.Post(url, "", bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("Post to %s: %v", url, err) } defer r.Body.Close() - if r.StatusCode >= 400 { - return nil, fmt.Errorf("%s: %s", url, r.Status) - } b, err := ioutil.ReadAll(r.Body) if err != nil { return nil, fmt.Errorf("Read response body: %v", err) } + if r.StatusCode >= 400 { + return nil, fmt.Errorf("%s: %s", url, r.Status) + } return b, nil } @@ -62,6 +65,8 @@ func Post(url string, values url.Values) ([]byte, error) { return b, nil } +// github.com/chrislusf/seaweedfs/unmaintained/repeated_vacuum/repeated_vacuum.go +// may need increasing http.Client.Timeout func Get(url string) ([]byte, error) { r, err := client.Get(url) if err != nil { @@ -83,14 +88,14 @@ func Head(url string) (http.Header, error) { if err != nil { return nil, err } - defer r.Body.Close() + defer CloseResponse(r) if r.StatusCode >= 400 { return nil, fmt.Errorf("%s: %s", url, r.Status) } return r.Header, nil } -func Delete(url string, jwt security.EncodedJwt) error { +func Delete(url string, jwt string) error { req, err := http.NewRequest("DELETE", url, nil) if jwt != "" { req.Header.Set("Authorization", "BEARER "+string(jwt)) @@ -125,7 +130,7 @@ func GetBufferStream(url string, values url.Values, allocatedBytes []byte, eachB if err != nil { return err } - defer r.Body.Close() + defer CloseResponse(r) if r.StatusCode != 200 { return fmt.Errorf("%s: %s", url, r.Status) } @@ -148,7 +153,7 @@ func GetUrlStream(url string, values url.Values, readFn func(io.Reader) error) e if err != nil { return err } - defer r.Body.Close() + defer CloseResponse(r) if r.StatusCode != 200 { return fmt.Errorf("%s: %s", url, r.Status) } @@ -184,69 +189,161 @@ func NormalizeUrl(url string) string { return "http://" + url } -func ReadUrl(fileUrl string, offset int64, size int, buf []byte) (n int64, e error) { +func ReadUrl(fileUrl string, cipherKey []byte, isGzipped bool, isFullChunk bool, offset int64, size int, buf []byte) (int64, error) { + + if cipherKey != nil { + var n int + err := readEncryptedUrl(fileUrl, cipherKey, isGzipped, offset, size, func(data []byte) { + n = copy(buf, data) + }) + return int64(n), err + } - req, _ := http.NewRequest("GET", fileUrl, nil) - req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+int64(size))) + req, err := http.NewRequest("GET", fileUrl, nil) + if err != nil { + return 0, err + } + if !isFullChunk { + req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+int64(size)-1)) + } else { + req.Header.Set("Accept-Encoding", "gzip") + } r, err := client.Do(req) if err != nil { return 0, err } + defer r.Body.Close() if r.StatusCode >= 400 { return 0, fmt.Errorf("%s: %s", fileUrl, r.Status) } - var i, m int + var reader io.ReadCloser + contentEncoding := r.Header.Get("Content-Encoding") + switch contentEncoding { + case "gzip": + reader, err = gzip.NewReader(r.Body) + defer reader.Close() + default: + reader = r.Body + } + + var ( + i, m int + n int64 + ) + // refers to https://github.com/golang/go/blob/master/src/bytes/buffer.go#L199 + // commit id c170b14c2c1cfb2fd853a37add92a82fd6eb4318 for { - m, err = r.Body.Read(buf[i:]) - if m == 0 { - return - } + m, err = reader.Read(buf[i:]) i += m n += int64(m) if err == io.EOF { return n, nil } - if e != nil { - return n, e + if err != nil { + return n, err + } + if n == int64(len(buf)) { + break } } - + // drains the response body to avoid memory leak + data, _ := ioutil.ReadAll(reader) + if len(data) != 0 { + glog.V(1).Infof("%s reader has remaining %d bytes", contentEncoding, len(data)) + } + return n, err } -func ReadUrlAsStream(fileUrl string, offset int64, size int, fn func(data []byte)) (n int64, e error) { +func ReadUrlAsStream(fileUrl string, cipherKey []byte, isContentGzipped bool, isFullChunk bool, offset int64, size int, fn func(data []byte)) error { - req, _ := http.NewRequest("GET", fileUrl, nil) - req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+int64(size))) + if cipherKey != nil { + return readEncryptedUrl(fileUrl, cipherKey, isContentGzipped, offset, size, fn) + } + + req, err := http.NewRequest("GET", fileUrl, nil) + if err != nil { + return err + } + + if !isFullChunk { + req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+int64(size)-1)) + } r, err := client.Do(req) if err != nil { - return 0, err + return err } - defer r.Body.Close() + defer CloseResponse(r) if r.StatusCode >= 400 { - return 0, fmt.Errorf("%s: %s", fileUrl, r.Status) + return fmt.Errorf("%s: %s", fileUrl, r.Status) } - var m int + var ( + m int + ) buf := make([]byte, 64*1024) for { m, err = r.Body.Read(buf) - if m == 0 { - return - } fn(buf[:m]) - n += int64(m) if err == io.EOF { - return n, nil + return nil } - if e != nil { - return n, e + if err != nil { + return err } } } + +func readEncryptedUrl(fileUrl string, cipherKey []byte, isContentGzipped bool, offset int64, size int, fn func(data []byte)) error { + encryptedData, err := Get(fileUrl) + if err != nil { + return fmt.Errorf("fetch %s: %v", fileUrl, err) + } + decryptedData, err := Decrypt(encryptedData, CipherKey(cipherKey)) + if err != nil { + return fmt.Errorf("decrypt %s: %v", fileUrl, err) + } + if isContentGzipped { + decryptedData, err = UnGzipData(decryptedData) + if err != nil { + return fmt.Errorf("unzip decrypt %s: %v", fileUrl, err) + } + } + if len(decryptedData) < int(offset)+size { + return fmt.Errorf("read decrypted %s size %d [%d, %d)", fileUrl, len(decryptedData), offset, int(offset)+size) + } + fn(decryptedData[int(offset) : int(offset)+size]) + return nil +} + +func ReadUrlAsReaderCloser(fileUrl string, rangeHeader string) (io.ReadCloser, error) { + + req, err := http.NewRequest("GET", fileUrl, nil) + if err != nil { + return nil, err + } + if rangeHeader != "" { + req.Header.Add("Range", rangeHeader) + } + + r, err := client.Do(req) + if err != nil { + return nil, err + } + if r.StatusCode >= 400 { + return nil, fmt.Errorf("%s: %s", fileUrl, r.Status) + } + + return r.Body, nil +} + +func CloseResponse(resp *http.Response) { + io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() +} diff --git a/weed/util/httpdown/http_down.go b/weed/util/httpdown/http_down.go new file mode 100644 index 000000000..5cbd9611c --- /dev/null +++ b/weed/util/httpdown/http_down.go @@ -0,0 +1,395 @@ +// Package httpdown provides http.ConnState enabled graceful termination of +// http.Server. +// based on github.com/facebookarchive/httpdown, who's licence is MIT-licence, +// we add a feature of supporting for http TLS +package httpdown + +import ( + "crypto/tls" + "fmt" + "net" + "net/http" + "os" + "os/signal" + "sync" + "syscall" + "time" + + "github.com/facebookgo/clock" + "github.com/facebookgo/stats" +) + +const ( + defaultStopTimeout = time.Minute + defaultKillTimeout = time.Minute +) + +// A Server allows encapsulates the process of accepting new connections and +// serving them, and gracefully shutting down the listener without dropping +// active connections. +type Server interface { + // Wait waits for the serving loop to finish. This will happen when Stop is + // called, at which point it returns no error, or if there is an error in the + // serving loop. You must call Wait after calling Serve or ListenAndServe. + Wait() error + + // Stop stops the listener. It will block until all connections have been + // closed. + Stop() error +} + +// HTTP defines the configuration for serving a http.Server. Multiple calls to +// Serve or ListenAndServe can be made on the same HTTP instance. The default +// timeouts of 1 minute each result in a maximum of 2 minutes before a Stop() +// returns. +type HTTP struct { + // StopTimeout is the duration before we begin force closing connections. + // Defaults to 1 minute. + StopTimeout time.Duration + + // KillTimeout is the duration before which we completely give up and abort + // even though we still have connected clients. This is useful when a large + // number of client connections exist and closing them can take a long time. + // Note, this is in addition to the StopTimeout. Defaults to 1 minute. + KillTimeout time.Duration + + // Stats is optional. If provided, it will be used to record various metrics. + Stats stats.Client + + // Clock allows for testing timing related functionality. Do not specify this + // in production code. + Clock clock.Clock + + // when set CertFile and KeyFile, the httpDown will start a http with TLS. + // Files containing a certificate and matching private key for the + // server must be provided if neither the Server's + // TLSConfig.Certificates nor TLSConfig.GetCertificate are populated. + // If the certificate is signed by a certificate authority, the + // certFile should be the concatenation of the server's certificate, + // any intermediates, and the CA's certificate. + CertFile, KeyFile string +} + +// Serve provides the low-level API which is useful if you're creating your own +// net.Listener. +func (h HTTP) Serve(s *http.Server, l net.Listener) Server { + stopTimeout := h.StopTimeout + if stopTimeout == 0 { + stopTimeout = defaultStopTimeout + } + killTimeout := h.KillTimeout + if killTimeout == 0 { + killTimeout = defaultKillTimeout + } + klock := h.Clock + if klock == nil { + klock = clock.New() + } + + ss := &server{ + stopTimeout: stopTimeout, + killTimeout: killTimeout, + stats: h.Stats, + clock: klock, + oldConnState: s.ConnState, + listener: l, + server: s, + serveDone: make(chan struct{}), + serveErr: make(chan error, 1), + new: make(chan net.Conn), + active: make(chan net.Conn), + idle: make(chan net.Conn), + closed: make(chan net.Conn), + stop: make(chan chan struct{}), + kill: make(chan chan struct{}), + certFile: h.CertFile, + keyFile: h.KeyFile, + } + s.ConnState = ss.connState + go ss.manage() + go ss.serve() + return ss +} + +// ListenAndServe returns a Server for the given http.Server. It is equivalent +// to ListenAndServe from the standard library, but returns immediately. +// Requests will be accepted in a background goroutine. If the http.Server has +// a non-nil TLSConfig, a TLS enabled listener will be setup. +func (h HTTP) ListenAndServe(s *http.Server) (Server, error) { + addr := s.Addr + if addr == "" { + if s.TLSConfig == nil { + addr = ":http" + } else { + addr = ":https" + } + } + l, err := net.Listen("tcp", addr) + if err != nil { + stats.BumpSum(h.Stats, "listen.error", 1) + return nil, err + } + if s.TLSConfig != nil { + l = tls.NewListener(l, s.TLSConfig) + } + return h.Serve(s, l), nil +} + +// server manages the serving process and allows for gracefully stopping it. +type server struct { + stopTimeout time.Duration + killTimeout time.Duration + stats stats.Client + clock clock.Clock + + oldConnState func(net.Conn, http.ConnState) + server *http.Server + serveDone chan struct{} + serveErr chan error + listener net.Listener + + new chan net.Conn + active chan net.Conn + idle chan net.Conn + closed chan net.Conn + stop chan chan struct{} + kill chan chan struct{} + + stopOnce sync.Once + stopErr error + + certFile, keyFile string +} + +func (s *server) connState(c net.Conn, cs http.ConnState) { + if s.oldConnState != nil { + s.oldConnState(c, cs) + } + + switch cs { + case http.StateNew: + s.new <- c + case http.StateActive: + s.active <- c + case http.StateIdle: + s.idle <- c + case http.StateHijacked, http.StateClosed: + s.closed <- c + } +} + +func (s *server) manage() { + defer func() { + close(s.new) + close(s.active) + close(s.idle) + close(s.closed) + close(s.stop) + close(s.kill) + }() + + var stopDone chan struct{} + + conns := map[net.Conn]http.ConnState{} + var countNew, countActive, countIdle float64 + + // decConn decrements the count associated with the current state of the + // given connection. + decConn := func(c net.Conn) { + switch conns[c] { + default: + panic(fmt.Errorf("unknown existing connection: %s", c)) + case http.StateNew: + countNew-- + case http.StateActive: + countActive-- + case http.StateIdle: + countIdle-- + } + } + + // setup a ticker to report various values every minute. if we don't have a + // Stats implementation provided, we Stop it so it never ticks. + statsTicker := s.clock.Ticker(time.Minute) + if s.stats == nil { + statsTicker.Stop() + } + + for { + select { + case <-statsTicker.C: + // we'll only get here when s.stats is not nil + s.stats.BumpAvg("http-state.new", countNew) + s.stats.BumpAvg("http-state.active", countActive) + s.stats.BumpAvg("http-state.idle", countIdle) + s.stats.BumpAvg("http-state.total", countNew+countActive+countIdle) + case c := <-s.new: + conns[c] = http.StateNew + countNew++ + case c := <-s.active: + decConn(c) + countActive++ + + conns[c] = http.StateActive + case c := <-s.idle: + decConn(c) + countIdle++ + + conns[c] = http.StateIdle + + // if we're already stopping, close it + if stopDone != nil { + c.Close() + } + case c := <-s.closed: + stats.BumpSum(s.stats, "conn.closed", 1) + decConn(c) + delete(conns, c) + + // if we're waiting to stop and are all empty, we just closed the last + // connection and we're done. + if stopDone != nil && len(conns) == 0 { + close(stopDone) + return + } + case stopDone = <-s.stop: + // if we're already all empty, we're already done + if len(conns) == 0 { + close(stopDone) + return + } + + // close current idle connections right away + for c, cs := range conns { + if cs == http.StateIdle { + c.Close() + } + } + + // continue the loop and wait for all the ConnState updates which will + // eventually close(stopDone) and return from this goroutine. + + case killDone := <-s.kill: + // force close all connections + stats.BumpSum(s.stats, "kill.conn.count", float64(len(conns))) + for c := range conns { + c.Close() + } + + // don't block the kill. + close(killDone) + + // continue the loop and we wait for all the ConnState updates and will + // return from this goroutine when we're all done. otherwise we'll try to + // send those ConnState updates on closed channels. + + } + } +} + +func (s *server) serve() { + stats.BumpSum(s.stats, "serve", 1) + if s.certFile == "" && s.keyFile == "" { + s.serveErr <- s.server.Serve(s.listener) + } else { + s.serveErr <- s.server.ServeTLS(s.listener, s.certFile, s.keyFile) + } + close(s.serveDone) + close(s.serveErr) +} + +func (s *server) Wait() error { + if err := <-s.serveErr; !isUseOfClosedError(err) { + return err + } + return nil +} + +func (s *server) Stop() error { + s.stopOnce.Do(func() { + defer stats.BumpTime(s.stats, "stop.time").End() + stats.BumpSum(s.stats, "stop", 1) + + // first disable keep-alive for new connections + s.server.SetKeepAlivesEnabled(false) + + // then close the listener so new connections can't connect come thru + closeErr := s.listener.Close() + <-s.serveDone + + // then trigger the background goroutine to stop and wait for it + stopDone := make(chan struct{}) + s.stop <- stopDone + + // wait for stop + select { + case <-stopDone: + case <-s.clock.After(s.stopTimeout): + defer stats.BumpTime(s.stats, "kill.time").End() + stats.BumpSum(s.stats, "kill", 1) + + // stop timed out, wait for kill + killDone := make(chan struct{}) + s.kill <- killDone + select { + case <-killDone: + case <-s.clock.After(s.killTimeout): + // kill timed out, give up + stats.BumpSum(s.stats, "kill.timeout", 1) + } + } + + if closeErr != nil && !isUseOfClosedError(closeErr) { + stats.BumpSum(s.stats, "listener.close.error", 1) + s.stopErr = closeErr + } + }) + return s.stopErr +} + +func isUseOfClosedError(err error) bool { + if err == nil { + return false + } + if opErr, ok := err.(*net.OpError); ok { + err = opErr.Err + } + return err.Error() == "use of closed network connection" +} + +// ListenAndServe is a convenience function to serve and wait for a SIGTERM +// or SIGINT before shutting down. +func ListenAndServe(s *http.Server, hd *HTTP) error { + if hd == nil { + hd = &HTTP{} + } + hs, err := hd.ListenAndServe(s) + if err != nil { + return err + } + + waiterr := make(chan error, 1) + go func() { + defer close(waiterr) + waiterr <- hs.Wait() + }() + + signals := make(chan os.Signal, 10) + signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT) + + select { + case err := <-waiterr: + if err != nil { + return err + } + case <-signals: + signal.Stop(signals) + if err := hs.Stop(); err != nil { + return err + } + if err := <-waiterr; err != nil { + return err + } + } + return nil +} diff --git a/weed/util/net_timeout.go b/weed/util/net_timeout.go index b8068e67f..8acd50d42 100644 --- a/weed/util/net_timeout.go +++ b/weed/util/net_timeout.go @@ -66,11 +66,8 @@ func (c *Conn) Write(b []byte) (count int, e error) { } func (c *Conn) Close() error { - err := c.Conn.Close() - if err == nil { - stats.ConnectionClose() - } - return err + stats.ConnectionClose() + return c.Conn.Close() } func NewListener(addr string, timeout time.Duration) (net.Listener, error) { diff --git a/weed/util/parse.go b/weed/util/parse.go index 0a8317c19..6593d43b6 100644 --- a/weed/util/parse.go +++ b/weed/util/parse.go @@ -1,7 +1,9 @@ package util import ( + "net/url" "strconv" + "strings" ) func ParseInt(text string, defaultValue int) int { @@ -24,3 +26,22 @@ func ParseUint64(text string, defaultValue uint64) uint64 { } return count } + +func ParseFilerUrl(entryPath string) (filerServer string, filerPort int64, path string, err error) { + if !strings.HasPrefix(entryPath, "http://") && !strings.HasPrefix(entryPath, "https://") { + entryPath = "http://" + entryPath + } + + var u *url.URL + u, err = url.Parse(entryPath) + if err != nil { + return + } + filerServer = u.Hostname() + portString := u.Port() + if portString != "" { + filerPort, err = strconv.ParseInt(portString, 10, 32) + } + path = u.Path + return +} diff --git a/weed/util/pprof.go b/weed/util/pprof.go index 363017555..a2621ceee 100644 --- a/weed/util/pprof.go +++ b/weed/util/pprof.go @@ -2,6 +2,7 @@ package util import ( "os" + "runtime" "runtime/pprof" "github.com/chrislusf/seaweedfs/weed/glog" @@ -19,6 +20,7 @@ func SetupProfiling(cpuProfile, memProfile string) { }) } if memProfile != "" { + runtime.MemProfileRate = 1 f, err := os.Create(memProfile) if err != nil { glog.Fatal(err) diff --git a/weed/util/queue.go b/weed/util/queue.go new file mode 100644 index 000000000..1e6211e0d --- /dev/null +++ b/weed/util/queue.go @@ -0,0 +1,61 @@ +package util + +import "sync" + +type node struct { + data interface{} + next *node +} + +type Queue struct { + head *node + tail *node + count int + sync.RWMutex +} + +func NewQueue() *Queue { + q := &Queue{} + return q +} + +func (q *Queue) Len() int { + q.RLock() + defer q.RUnlock() + return q.count +} + +func (q *Queue) Enqueue(item interface{}) { + q.Lock() + defer q.Unlock() + + n := &node{data: item} + + if q.tail == nil { + q.tail = n + q.head = n + } else { + q.tail.next = n + q.tail = n + } + q.count++ +} + +func (q *Queue) Dequeue() interface{} { + q.Lock() + defer q.Unlock() + + if q.head == nil { + return nil + } + + n := q.head + q.head = n.next + + if q.head == nil { + q.tail = nil + } + q.count-- + + return n.data +} diff --git a/weed/util/queue_unbounded.go b/weed/util/queue_unbounded.go new file mode 100644 index 000000000..664cd965e --- /dev/null +++ b/weed/util/queue_unbounded.go @@ -0,0 +1,45 @@ +package util + +import "sync" + +type UnboundedQueue struct { + outbound []string + outboundLock sync.RWMutex + inbound []string + inboundLock sync.RWMutex +} + +func NewUnboundedQueue() *UnboundedQueue { + q := &UnboundedQueue{} + return q +} + +func (q *UnboundedQueue) EnQueue(items ...string) { + q.inboundLock.Lock() + defer q.inboundLock.Unlock() + + q.outbound = append(q.outbound, items...) + +} + +func (q *UnboundedQueue) Consume(fn func([]string)) { + q.outboundLock.Lock() + defer q.outboundLock.Unlock() + + if len(q.outbound) == 0 { + q.inboundLock.Lock() + inbountLen := len(q.inbound) + if inbountLen > 0 { + t := q.outbound + q.outbound = q.inbound + q.inbound = t + } + q.inboundLock.Unlock() + } + + if len(q.outbound) > 0 { + fn(q.outbound) + q.outbound = q.outbound[:0] + } + +} diff --git a/weed/util/queue_unbounded_test.go b/weed/util/queue_unbounded_test.go new file mode 100644 index 000000000..2d02032cb --- /dev/null +++ b/weed/util/queue_unbounded_test.go @@ -0,0 +1,25 @@ +package util + +import "testing" + +func TestEnqueueAndConsume(t *testing.T) { + + q := NewUnboundedQueue() + + q.EnQueue("1", "2", "3") + + f := func(items []string) { + for _, t := range items { + println(t) + } + println("-----------------------") + } + q.Consume(f) + + q.Consume(f) + + q.EnQueue("4", "5") + q.EnQueue("6", "7") + q.Consume(f) + +} diff --git a/weed/util/throttler.go b/weed/util/throttler.go new file mode 100644 index 000000000..873161e37 --- /dev/null +++ b/weed/util/throttler.go @@ -0,0 +1,34 @@ +package util + +import "time" + +type WriteThrottler struct { + compactionBytePerSecond int64 + lastSizeCounter int64 + lastSizeCheckTime time.Time +} + +func NewWriteThrottler(bytesPerSecond int64) *WriteThrottler { + return &WriteThrottler{ + compactionBytePerSecond: bytesPerSecond, + lastSizeCheckTime: time.Now(), + } +} + +func (wt *WriteThrottler) MaybeSlowdown(delta int64) { + if wt.compactionBytePerSecond > 0 { + wt.lastSizeCounter += delta + now := time.Now() + elapsedDuration := now.Sub(wt.lastSizeCheckTime) + if elapsedDuration > 100*time.Millisecond { + overLimitBytes := wt.lastSizeCounter - wt.compactionBytePerSecond/10 + if overLimitBytes > 0 { + overRatio := float64(overLimitBytes) / float64(wt.compactionBytePerSecond) + sleepTime := time.Duration(overRatio*1000) * time.Millisecond + // glog.V(0).Infof("currently %d bytes, limit to %d bytes, over by %d bytes, sleeping %v over %.4f", wt.lastSizeCounter, wt.compactionBytePerSecond/10, overLimitBytes, sleepTime, overRatio) + time.Sleep(sleepTime) + } + wt.lastSizeCounter, wt.lastSizeCheckTime = 0, time.Now() + } + } +} diff --git a/weed/wdclient/masterclient.go b/weed/wdclient/masterclient.go index df0adbd18..301f20615 100644 --- a/weed/wdclient/masterclient.go +++ b/weed/wdclient/masterclient.go @@ -2,30 +2,33 @@ package wdclient import ( "context" - "fmt" + "math/rand" "time" + "google.golang.org/grpc" + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/pb" "github.com/chrislusf/seaweedfs/weed/pb/master_pb" - "github.com/chrislusf/seaweedfs/weed/util" - "math/rand" ) type MasterClient struct { - ctx context.Context - name string - currentMaster string - masters []string + name string + grpcPort uint32 + currentMaster string + masters []string + grpcDialOption grpc.DialOption vidMap } -func NewMasterClient(ctx context.Context, clientName string, masters []string) *MasterClient { +func NewMasterClient(grpcDialOption grpc.DialOption, clientName string, clientGrpcPort uint32, masters []string) *MasterClient { return &MasterClient{ - ctx: ctx, - name: clientName, - masters: masters, - vidMap: newVidMap(), + name: clientName, + grpcPort: clientGrpcPort, + masters: masters, + grpcDialOption: grpcDialOption, + vidMap: newVidMap(), } } @@ -40,7 +43,7 @@ func (mc *MasterClient) WaitUntilConnected() { } func (mc *MasterClient) KeepConnectedToMaster() { - glog.V(0).Infof("%s bootstraps with masters %v", mc.name, mc.masters) + glog.V(1).Infof("%s bootstraps with masters %v", mc.name, mc.masters) for { mc.tryAllMasters() time.Sleep(time.Second) @@ -48,59 +51,75 @@ func (mc *MasterClient) KeepConnectedToMaster() { } func (mc *MasterClient) tryAllMasters() { + nextHintedLeader := "" for _, master := range mc.masters { - glog.V(0).Infof("Connecting to %v", master) - withMasterClient(master, func(client master_pb.SeaweedClient) error { - stream, err := client.KeepConnected(context.Background()) + + nextHintedLeader = mc.tryConnectToMaster(master) + for nextHintedLeader != "" { + nextHintedLeader = mc.tryConnectToMaster(nextHintedLeader) + } + + mc.currentMaster = "" + mc.vidMap = newVidMap() + } +} + +func (mc *MasterClient) tryConnectToMaster(master string) (nextHintedLeader string) { + glog.V(1).Infof("%s Connecting to master %v", mc.name, master) + gprcErr := pb.WithMasterClient(master, mc.grpcDialOption, func(client master_pb.SeaweedClient) error { + + stream, err := client.KeepConnected(context.Background()) + if err != nil { + glog.V(0).Infof("%s failed to keep connected to %s: %v", mc.name, master, err) + return err + } + + if err = stream.Send(&master_pb.KeepConnectedRequest{Name: mc.name, GrpcPort: mc.grpcPort}); err != nil { + glog.V(0).Infof("%s failed to send to %s: %v", mc.name, master, err) + return err + } + + glog.V(1).Infof("%s Connected to %v", mc.name, master) + mc.currentMaster = master + + for { + volumeLocation, err := stream.Recv() if err != nil { - glog.V(0).Infof("failed to keep connected to %s: %v", master, err) + glog.V(0).Infof("%s failed to receive from %s: %v", mc.name, master, err) return err } - if err = stream.Send(&master_pb.ClientListenRequest{Name: mc.name}); err != nil { - glog.V(0).Infof("failed to send to %s: %v", master, err) - return err + // maybe the leader is changed + if volumeLocation.Leader != "" { + glog.V(0).Infof("redirected to leader %v", volumeLocation.Leader) + nextHintedLeader = volumeLocation.Leader + return nil } - for { - if volumeLocation, err := stream.Recv(); err != nil { - glog.V(0).Infof("failed to receive from %s: %v", master, err) - return err - } else { - loc := Location{ - Url: volumeLocation.Url, - PublicUrl: volumeLocation.PublicUrl, - } - for _, newVid := range volumeLocation.NewVids { - mc.addLocation(newVid, loc) - } - for _, deletedVid := range volumeLocation.DeletedVids { - mc.deleteLocation(deletedVid, loc) - } - - if mc.currentMaster == "" { - glog.V(0).Infof("Connected to %v", master) - mc.currentMaster = master - } - - } + // process new volume location + loc := Location{ + Url: volumeLocation.Url, + PublicUrl: volumeLocation.PublicUrl, } + for _, newVid := range volumeLocation.NewVids { + glog.V(1).Infof("%s: %s adds volume %d", mc.name, loc.Url, newVid) + mc.addLocation(newVid, loc) + } + for _, deletedVid := range volumeLocation.DeletedVids { + glog.V(1).Infof("%s: %s removes volume %d", mc.name, loc.Url, deletedVid) + mc.deleteLocation(deletedVid, loc) + } + } - }) - - mc.currentMaster = "" + }) + if gprcErr != nil { + glog.V(0).Infof("%s failed to connect with master %v: %v", mc.name, master, gprcErr) } + return } -func withMasterClient(master string, fn func(client master_pb.SeaweedClient) error) error { - - grpcConnection, err := util.GrpcDial(master) - if err != nil { - return fmt.Errorf("fail to dial %s: %v", master, err) - } - defer grpcConnection.Close() - - client := master_pb.NewSeaweedClient(grpcConnection) - - return fn(client) +func (mc *MasterClient) WithClient(fn func(client master_pb.SeaweedClient) error) error { + return pb.WithMasterClient(mc.currentMaster, mc.grpcDialOption, func(client master_pb.SeaweedClient) error { + return fn(client) + }) } diff --git a/weed/wdclient/vid_map.go b/weed/wdclient/vid_map.go index 02c3efd17..97df49cb6 100644 --- a/weed/wdclient/vid_map.go +++ b/weed/wdclient/vid_map.go @@ -3,14 +3,18 @@ package wdclient import ( "errors" "fmt" - "math/rand" "strconv" "strings" "sync" + "sync/atomic" "github.com/chrislusf/seaweedfs/weed/glog" ) +const ( + maxCursorIndex = 4096 +) + type Location struct { Url string `json:"url,omitempty"` PublicUrl string `json:"publicUrl,omitempty"` @@ -19,12 +23,25 @@ type Location struct { type vidMap struct { sync.RWMutex vid2Locations map[uint32][]Location + + cursor int32 } func newVidMap() vidMap { return vidMap{ vid2Locations: make(map[uint32][]Location), + cursor: -1, + } +} + +func (vc *vidMap) getLocationIndex(length int) (int, error) { + if length <= 0 { + return 0, fmt.Errorf("invalid length: %d", length) + } + if atomic.LoadInt32(&vc.cursor) == maxCursorIndex { + atomic.CompareAndSwapInt32(&vc.cursor, maxCursorIndex, -1) } + return int(atomic.AddInt32(&vc.cursor, 1)) % length, nil } func (vc *vidMap) LookupVolumeServerUrl(vid string) (serverUrl string, err error) { @@ -34,12 +51,7 @@ func (vc *vidMap) LookupVolumeServerUrl(vid string) (serverUrl string, err error return "", err } - locations := vc.GetLocations(uint32(id)) - if len(locations) == 0 { - return "", fmt.Errorf("volume %d not found", id) - } - - return locations[rand.Intn(len(locations))].Url, nil + return vc.GetRandomLocation(uint32(id)) } func (vc *vidMap) LookupFileId(fileId string) (fullUrl string, err error) { @@ -66,11 +78,42 @@ func (vc *vidMap) LookupVolumeServer(fileId string) (volumeServer string, err er return serverUrl, nil } -func (vc *vidMap) GetLocations(vid uint32) (locations []Location) { +func (vc *vidMap) GetVidLocations(vid string) (locations []Location, err error) { + id, err := strconv.Atoi(vid) + if err != nil { + glog.V(1).Infof("Unknown volume id %s", vid) + return nil, fmt.Errorf("Unknown volume id %s", vid) + } + foundLocations, found := vc.GetLocations(uint32(id)) + if found { + return foundLocations, nil + } + return nil, fmt.Errorf("volume id %s not found", vid) +} + +func (vc *vidMap) GetLocations(vid uint32) (locations []Location, found bool) { + vc.RLock() + defer vc.RUnlock() + + locations, found = vc.vid2Locations[vid] + return +} + +func (vc *vidMap) GetRandomLocation(vid uint32) (serverUrl string, err error) { vc.RLock() defer vc.RUnlock() - return vc.vid2Locations[vid] + locations := vc.vid2Locations[vid] + if len(locations) == 0 { + return "", fmt.Errorf("volume %d not found", vid) + } + + index, err := vc.getLocationIndex(len(locations)) + if err != nil { + return "", fmt.Errorf("volume %d: %v", vid, err) + } + + return locations[index].Url, nil } func (vc *vidMap) addLocation(vid uint32, location Location) { @@ -105,6 +148,7 @@ func (vc *vidMap) deleteLocation(vid uint32, location Location) { for i, loc := range locations { if loc.Url == location.Url { vc.vid2Locations[vid] = append(locations[0:i], locations[i+1:]...) + break } } diff --git a/weed/wdclient/vid_map_test.go b/weed/wdclient/vid_map_test.go new file mode 100644 index 000000000..87be2fc25 --- /dev/null +++ b/weed/wdclient/vid_map_test.go @@ -0,0 +1,76 @@ +package wdclient + +import ( + "fmt" + "testing" +) + +func TestLocationIndex(t *testing.T) { + vm := vidMap{} + // test must be failed + mustFailed := func(length int) { + _, err := vm.getLocationIndex(length) + if err == nil { + t.Errorf("length %d must be failed", length) + } + if err.Error() != fmt.Sprintf("invalid length: %d", length) { + t.Errorf("length %d must be failed. error: %v", length, err) + } + } + + mustFailed(-1) + mustFailed(0) + + mustOk := func(length, cursor, expect int) { + if length <= 0 { + t.Fatal("please don't do this") + } + vm.cursor = int32(cursor) + got, err := vm.getLocationIndex(length) + if err != nil { + t.Errorf("length: %d, why? %v\n", length, err) + return + } + if got != expect { + t.Errorf("cursor: %d, length: %d, expect: %d, got: %d\n", cursor, length, expect, got) + return + } + } + + for i := -1; i < 100; i++ { + mustOk(7, i, (i+1)%7) + } + + // when cursor reaches MaxInt64 + mustOk(7, maxCursorIndex, 0) + + // test with constructor + vm = newVidMap() + length := 7 + for i := 0; i < 100; i++ { + got, err := vm.getLocationIndex(length) + if err != nil { + t.Errorf("length: %d, why? %v\n", length, err) + return + } + if got != i%length { + t.Errorf("length: %d, i: %d, got: %d\n", length, i, got) + } + } +} + +func BenchmarkLocationIndex(b *testing.B) { + b.SetParallelism(8) + vm := vidMap{ + cursor: maxCursorIndex - 4000, + } + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _, err := vm.getLocationIndex(3) + if err != nil { + b.Error(err) + } + } + }) +} diff --git a/weed/wdclient/wdclient.go b/weed/wdclient/wdclient.go deleted file mode 100644 index 722f4d061..000000000 --- a/weed/wdclient/wdclient.go +++ /dev/null @@ -1,15 +0,0 @@ -package wdclient - -import ( - "context" -) - -type SeaweedClient struct { - *MasterClient -} - -func NewSeaweedClient(ctx context.Context, clientName string, masters []string) *SeaweedClient { - return &SeaweedClient{ - MasterClient: NewMasterClient(ctx, clientName, masters), - } -} diff --git a/weed/weed.go b/weed/weed.go index 340da6625..ecb0ba2a4 100644 --- a/weed/weed.go +++ b/weed/weed.go @@ -21,7 +21,6 @@ import ( ) var IsDebug *bool -var server *string var commands = command.Commands |
