diff options
75 files changed, 5633 insertions, 4599 deletions
diff --git a/.github/workflows/java_integration_tests.yml b/.github/workflows/java_integration_tests.yml index ea7322229..ec90c9df5 100644 --- a/.github/workflows/java_integration_tests.yml +++ b/.github/workflows/java_integration_tests.yml @@ -128,11 +128,6 @@ jobs: run: | mvn test -Dtest=*IntegrationTest - - name: Run HDFS2 Configuration Tests - working-directory: other/java/hdfs2 - run: | - mvn test -Dtest=SeaweedFileSystemConfigTest -Dmaven.javadoc.skip=true -Dgpg.skip=true - - name: Run HDFS3 Configuration Tests working-directory: other/java/hdfs3 run: | diff --git a/.github/workflows/java_unit_tests.yml b/.github/workflows/java_unit_tests.yml index 4e88abc5d..41c39c40c 100644 --- a/.github/workflows/java_unit_tests.yml +++ b/.github/workflows/java_unit_tests.yml @@ -42,11 +42,6 @@ jobs: run: | mvn test -Dtest=SeaweedReadTest,SeaweedCipherTest - - name: Run HDFS2 Configuration Tests - working-directory: other/java/hdfs2 - run: | - mvn test -Dtest=SeaweedFileSystemConfigTest -Dmaven.javadoc.skip=true -Dgpg.skip=true - - name: Run HDFS3 Configuration Tests working-directory: other/java/hdfs3 run: | @@ -59,6 +54,5 @@ jobs: name: test-reports-java-${{ matrix.java }} path: | other/java/client/target/surefire-reports/ - other/java/hdfs2/target/surefire-reports/ other/java/hdfs3/target/surefire-reports/ diff --git a/.github/workflows/spark-integration-tests.yml b/.github/workflows/spark-integration-tests.yml new file mode 100644 index 000000000..4983a8f59 --- /dev/null +++ b/.github/workflows/spark-integration-tests.yml @@ -0,0 +1,263 @@ +name: Spark Integration Tests + +on: + push: + paths: + - 'test/java/spark/**' + - 'other/java/hdfs3/**' + - 'other/java/client/**' + - '.github/workflows/spark-integration-tests.yml' + pull_request: + paths: + - 'test/java/spark/**' + - 'other/java/hdfs3/**' + - 'other/java/client/**' + - '.github/workflows/spark-integration-tests.yml' + workflow_dispatch: + +permissions: + contents: read + checks: write + pull-requests: write + +jobs: + spark-integration-tests: + name: Spark Integration Tests + runs-on: ubuntu-latest + timeout-minutes: 45 + + steps: + # ======================================== + # SETUP & BUILD + # ======================================== + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + java-version: '11' + distribution: 'temurin' + cache: maven + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24' + + - name: Build SeaweedFS binary + run: | + echo "Building SeaweedFS binary (statically linked for Alpine)..." + cd weed + CGO_ENABLED=0 go build -o ../docker/weed + cd ../docker + ls -la weed filer.toml entrypoint.sh + file weed + echo "OK SeaweedFS binary built" + + - name: Build SeaweedFS Java dependencies + run: | + echo "Building Java client..." + cd other/java/client + mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true + echo "OK Java client built" + cd ../../.. + + echo "Building HDFS3 client..." + cd other/java/hdfs3 + mvn clean install -U -DskipTests -Dgpg.skip=true -Dcentral.publishing.skip=true + echo "OK HDFS3 client built" + echo "" + echo "All Java dependencies installed to ~/.m2/repository" + + # ======================================== + # SPARK INTEGRATION TESTS (DOCKER) + # ======================================== + - name: Start SeaweedFS services for tests + working-directory: test/java/spark + run: | + echo "=== Starting SeaweedFS Services for Tests ===" + docker compose down -v || true + docker compose build --no-cache + docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer + + echo "Waiting for services..." + for i in {1..30}; do + if curl -f http://localhost:8888/ > /dev/null 2>&1; then + echo "OK SeaweedFS filer is ready!" + break + fi + if [ $i -eq 30 ]; then + echo "FAILED Services failed to start" + docker compose ps -a + docker compose logs + exit 1 + fi + echo "Waiting... ($i/30)" + sleep 2 + done + + curl -f http://localhost:9333/cluster/status || exit 1 + echo "OK All services healthy" + + - name: Prepare Maven repository for Docker + working-directory: test/java/spark + run: | + echo "Copying Maven artifacts for Docker container..." + mkdir -p .m2/repository/com + cp -r ~/.m2/repository/com/seaweedfs .m2/repository/com/ + echo "OK Maven artifacts copied" + + - name: Run Spark integration tests + working-directory: test/java/spark + run: | + echo "=== Running Spark Integration Tests ===" + docker compose up --abort-on-container-exit spark-tests + + echo "" + echo "=== Test Logs ===" + docker compose logs spark-tests | tail -100 + + - name: Stop test services + if: always() + working-directory: test/java/spark + run: docker compose down -v + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: spark-test-results + path: test/java/spark/target/surefire-reports/ + retention-days: 30 + + - name: Publish test report + if: always() + uses: dorny/test-reporter@v1 + with: + name: Spark Test Results + path: test/java/spark/target/surefire-reports/*.xml + reporter: java-junit + fail-on-error: true + + # ======================================== + # SPARK EXAMPLE (HOST-BASED) - DISABLED + # Note: Host-based example doesn't work with Docker networking + # because master returns Docker hostnames (seaweedfs-volume) + # which are not resolvable from the host. + # The Docker-based tests above are sufficient. + # ======================================== + - name: Cache Apache Spark + if: false && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + id: cache-spark + uses: actions/cache@v4 + with: + path: spark-3.5.0-bin-hadoop3 + key: spark-3.5.0-hadoop3 + + - name: Download Apache Spark + if: false && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && steps.cache-spark.outputs.cache-hit != 'true' + run: | + echo "Downloading Apache Spark 3.5.0..." + wget -q https://archive.apache.org/dist/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz + tar xzf spark-3.5.0-bin-hadoop3.tgz + echo "OK Spark downloaded" + + - name: Start SeaweedFS services for example + if: false && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + working-directory: test/java/spark + run: | + echo "=== Starting SeaweedFS Services for Example ===" + docker compose down -v || true + docker compose build --no-cache + docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer + + echo "Waiting for services..." + for i in {1..30}; do + if curl -f http://localhost:8888/ > /dev/null 2>&1; then + echo "OK SeaweedFS filer is ready!" + break + fi + if [ $i -eq 30 ]; then + echo "FAILED Services failed to start" + docker compose ps -a + docker compose logs + exit 1 + fi + echo "Waiting... ($i/30)" + sleep 2 + done + + curl -f http://localhost:9333/cluster/status || exit 1 + echo "OK All services healthy" + + - name: Clean target directory + if: false && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + working-directory: test/java/spark + run: | + # Force remove target directory to avoid permission issues + sudo rm -rf target || rm -rf target || true + echo "OK Target directory cleaned" + + - name: Build project for example + if: false && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + working-directory: test/java/spark + run: mvn package -DskipTests + + - name: Run Spark example application + if: false && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + working-directory: test/java/spark + run: | + echo "=== Running Spark Example Application ===" + export SPARK_HOME=$(pwd)/../../../spark-3.5.0-bin-hadoop3 + $SPARK_HOME/bin/spark-submit \ + --class seaweed.spark.SparkSeaweedFSExample \ + --master local[2] \ + --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \ + --conf spark.hadoop.fs.seaweed.filer.host=localhost \ + --conf spark.hadoop.fs.seaweed.filer.port=8888 \ + --conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \ + --conf spark.hadoop.fs.seaweed.replication="" \ + target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \ + seaweedfs://localhost:8888/ci-spark-output + echo "OK Example completed" + + - name: Verify example output + if: false && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + run: | + echo "Verifying output..." + curl -s http://localhost:8888/ci-spark-output/ || echo "Output listing unavailable" + + - name: Stop example services + if: false && always() && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') + working-directory: test/java/spark + run: docker compose down -v + + # ======================================== + # DIAGNOSTICS + # ======================================== + - name: Display diagnostics on failure + if: failure() + working-directory: test/java/spark + run: | + echo "=== Container Status ===" + docker compose ps -a + echo "" + echo "=== Master Logs ===" + docker compose logs seaweedfs-master + echo "" + echo "=== Volume Logs ===" + docker compose logs seaweedfs-volume + echo "" + echo "=== Filer Logs ===" + docker compose logs seaweedfs-filer + echo "" + echo "=== Volume List ===" + docker compose exec -T seaweedfs-master weed shell <<EOF || echo "Failed" + volume.list + exit + EOF + echo "" + echo "=== Cluster Status ===" + curl -s http://localhost:9333/dir/status | jq '.' || curl -s http://localhost:9333/dir/status + diff --git a/docker/Dockerfile.local b/docker/Dockerfile.local index 062db4d84..9ea378401 100644 --- a/docker/Dockerfile.local +++ b/docker/Dockerfile.local @@ -1,8 +1,7 @@ FROM alpine AS final LABEL author="Chris Lu" -COPY ./weed /usr/bin/ -COPY ./weed_pub* /usr/bin/ -COPY ./weed_sub* /usr/bin/ +COPY ./weed /usr/bin/weed +RUN chmod +x /usr/bin/weed && ls -la /usr/bin/weed RUN mkdir -p /etc/seaweedfs COPY ./filer.toml /etc/seaweedfs/filer.toml COPY ./entrypoint.sh /entrypoint.sh @@ -100,15 +100,15 @@ require ( golang.org/x/exp v0.0.0-20250811191247-51f88131bc50 golang.org/x/image v0.33.0 golang.org/x/net v0.47.0 - golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/oauth2 v0.32.0 // indirect golang.org/x/sys v0.38.0 golang.org/x/text v0.31.0 // indirect golang.org/x/tools v0.38.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect google.golang.org/api v0.247.0 google.golang.org/genproto v0.0.0-20250715232539-7130f93afb79 // indirect - google.golang.org/grpc v1.75.1 - google.golang.org/protobuf v1.36.9 + google.golang.org/grpc v1.77.0 + google.golang.org/protobuf v1.36.10 gopkg.in/inf.v0 v0.9.1 // indirect modernc.org/b v1.0.0 // indirect modernc.org/mathutil v1.7.1 @@ -229,7 +229,7 @@ require ( cel.dev/expr v0.24.0 // indirect cloud.google.com/go/auth v0.16.5 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect - cloud.google.com/go/compute/metadata v0.8.0 // indirect + cloud.google.com/go/compute/metadata v0.9.0 // indirect cloud.google.com/go/iam v1.5.2 // indirect cloud.google.com/go/monitoring v1.24.2 // indirect filippo.io/edwards25519 v1.1.0 // indirect @@ -241,7 +241,7 @@ require ( github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect github.com/Files-com/files-sdk-go/v3 v3.2.218 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 // indirect github.com/IBM/go-sdk-core/v5 v5.21.0 // indirect @@ -290,7 +290,7 @@ require ( github.com/cloudsoda/go-smb2 v0.0.0-20250228001242-d4c70e6251cc // indirect github.com/cloudsoda/sddl v0.0.0-20250224235906-926454e91efc // indirect github.com/cloudwego/base64x v0.1.6 // indirect - github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect + github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f // indirect github.com/colinmarc/hdfs/v2 v2.4.0 // indirect github.com/creasty/defaults v1.8.0 // indirect github.com/cronokirby/saferith v0.33.0 // indirect @@ -302,7 +302,7 @@ require ( github.com/elastic/gosigar v0.14.3 // indirect github.com/emersion/go-message v0.18.2 // indirect github.com/emersion/go-vcard v0.0.0-20241024213814-c9703dde27ff // indirect - github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect + github.com/envoyproxy/go-control-plane/envoy v1.35.0 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect github.com/fatih/color v1.18.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -312,7 +312,7 @@ require ( github.com/gin-contrib/sse v1.1.0 // indirect github.com/go-chi/chi/v5 v5.2.2 // indirect github.com/go-darwin/apfs v0.0.0-20211011131704-f84b94dbf348 // indirect - github.com/go-jose/go-jose/v4 v4.1.1 // indirect + github.com/go-jose/go-jose/v4 v4.1.3 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect @@ -408,7 +408,7 @@ require ( github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 // indirect github.com/spacemonkeygo/monkit/v3 v3.0.24 // indirect github.com/spf13/pflag v1.0.10 // indirect - github.com/spiffe/go-spiffe/v2 v2.5.0 // indirect + github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/t3rm1n4l/go-mega v0.0.0-20250926104142-ccb8d3498e6c // indirect github.com/tarantool/go-iproto v1.1.0 // indirect @@ -434,22 +434,22 @@ require ( github.com/zeebo/errs v1.4.0 // indirect go.etcd.io/bbolt v1.4.2 // indirect go.etcd.io/etcd/api/v3 v3.6.6 // indirect - go.opentelemetry.io/auto/sdk v1.1.0 // indirect - go.opentelemetry.io/contrib/detectors/gcp v1.37.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/detectors/gcp v1.38.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.62.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect - go.opentelemetry.io/otel v1.37.0 // indirect - go.opentelemetry.io/otel/metric v1.37.0 // indirect - go.opentelemetry.io/otel/sdk v1.37.0 // indirect - go.opentelemetry.io/otel/sdk/metric v1.37.0 // indirect - go.opentelemetry.io/otel/trace v1.37.0 // indirect + go.opentelemetry.io/otel v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/sdk v1.38.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/arch v0.20.0 // indirect golang.org/x/term v0.37.0 // indirect golang.org/x/time v0.12.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20251111163417-95abcf5c77ba // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/validator.v2 v2.0.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect @@ -160,6 +160,8 @@ cloud.google.com/go/compute/metadata v0.2.1/go.mod h1:jgHgmJd2RKBGzXqF5LR2EZMGxB cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/compute/metadata v0.8.0 h1:HxMRIbao8w17ZX6wBnjhcDkW6lTFpgcaobyVfZWqRLA= cloud.google.com/go/compute/metadata v0.8.0/go.mod h1:sYOGTp851OV9bOFJ9CH7elVvyzopvWQFNNghtDQ/Biw= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= cloud.google.com/go/contactcenterinsights v1.3.0/go.mod h1:Eu2oemoePuEFc/xKFPjbTuPSj0fYJcPls9TFlPNnHHY= cloud.google.com/go/contactcenterinsights v1.4.0/go.mod h1:L2YzkGbPsv+vMQMCADxJoT9YiTTnSEd6fEvCeHTYVck= cloud.google.com/go/contactcenterinsights v1.6.0/go.mod h1:IIDlT6CLcDoyv79kDv8iWxMSTZhLxSCofVV5W6YFM/w= @@ -578,6 +580,8 @@ github.com/Files-com/files-sdk-go/v3 v3.2.218 h1:tIvcbHXNY/bq+Sno6vajOJOxhe5XbU5 github.com/Files-com/files-sdk-go/v3 v3.2.218/go.mod h1:E0BaGQbcMUcql+AfubCR/iasWKBxX5UZPivnQGC2z0M= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0 h1:UQUsRi8WTzhZntp5313l+CHIAT95ojUI2lpP/ExlZa4= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0/go.mod h1:Cz6ft6Dkn3Et6l2v2a9/RpN7epQ1GtDlO6lj8bEcOvw= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0 h1:sBEjpZlNHzK1voKq9695PJSX2o5NEXl7/OL3coiIY0c= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 h1:owcC2UnmsZycprQ5RfRgjydWhuoxg71LUfyiQdijZuM= github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0/go.mod h1:ZPpqegjbE99EPKsu3iUWV22A04wzGPcAY/ziSIQEEgs= github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.53.0 h1:4LP6hvB4I5ouTbGgWtixJhgED6xdf67twf9PoY96Tbg= @@ -798,6 +802,8 @@ github.com/cncf/xds/go v0.0.0-20230105202645-06c439db220b/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls= github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= +github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f h1:Y8xYupdHxryycyPlc9Y+bSQAYZnetRJ70VMVKm5CKI0= +github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f/go.mod h1:HlzOvOjVBOfTGSRXRyY0OiCS/3J1akRGQQpRO/7zyF4= github.com/cockroachdb/apd/v3 v3.1.0 h1:MK3Ow7LH0W8zkd5GMKA1PvS9qG3bWFI95WaVNfyZJ/w= github.com/cockroachdb/apd/v3 v3.1.0/go.mod h1:6qgPBMXjATAdD/VefbRP9NoSLKjbB4LCoA7gN4LpHs4= github.com/cockroachdb/errors v1.11.3 h1:5bA+k2Y6r+oz/6Z/RFlNeVCesGARKuC6YymtcDrbC/I= @@ -880,8 +886,11 @@ github.com/envoyproxy/go-control-plane v0.10.3/go.mod h1:fJJn/j26vwOu972OllsvAgJ github.com/envoyproxy/go-control-plane v0.11.0/go.mod h1:VnHyVMpzcLvCFt9yUz1UnCwHLhwx1WguiVDV7pTG/tI= github.com/envoyproxy/go-control-plane v0.13.4 h1:zEqyPVyku6IvWCFwux4x9RxkLOMUL+1vC9xUFv5l2/M= github.com/envoyproxy/go-control-plane v0.13.4/go.mod h1:kDfuBlDVsSj2MjrLEtRWtHlsWIFcGyB2RMO44Dc5GZA= +github.com/envoyproxy/go-control-plane v0.13.5-0.20251024222203-75eaa193e329 h1:K+fnvUM0VZ7ZFJf0n4L/BRlnsb9pL/GuDG6FqaH+PwM= github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A= github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw= +github.com/envoyproxy/go-control-plane/envoy v1.35.0 h1:ixjkELDE+ru6idPxcHLj8LBVc2bFP7iBytj353BoHUo= +github.com/envoyproxy/go-control-plane/envoy v1.35.0/go.mod h1:09qwbGVuSWWAyN5t/b3iyVfz5+z8QWGrzkoqm/8SbEs= github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= @@ -950,6 +959,8 @@ github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-jose/go-jose/v4 v4.1.1 h1:JYhSgy4mXXzAdF3nUx3ygx347LRXJRrpgyU3adRmkAI= github.com/go-jose/go-jose/v4 v4.1.1/go.mod h1:BdsZGqgdO3b6tTc6LSE56wcDbMMLuPsw5d4ZD5f94kA= +github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= +github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= @@ -1664,6 +1675,8 @@ github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU= github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY= github.com/spiffe/go-spiffe/v2 v2.5.0 h1:N2I01KCUkv1FAjZXJMwh95KK1ZIQLYbPfhaxw8WS0hE= github.com/spiffe/go-spiffe/v2 v2.5.0/go.mod h1:P+NxobPc6wXhVtINNtFjNWGBTreew1GBUCwT2wPmb7g= +github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo= +github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -1834,14 +1847,20 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/detectors/gcp v1.37.0 h1:B+WbN9RPsvobe6q4vP6KgM8/9plR/HNjgGBrfcOlweA= go.opentelemetry.io/contrib/detectors/gcp v1.37.0/go.mod h1:K5zQ3TT7p2ru9Qkzk0bKtCql0RGkPj9pRjpXgZJZ+rU= +go.opentelemetry.io/contrib/detectors/gcp v1.38.0 h1:ZoYbqX7OaA/TAikspPl3ozPI6iY6LiIY9I8cUfm+pJs= +go.opentelemetry.io/contrib/detectors/gcp v1.38.0/go.mod h1:SU+iU7nu5ud4oCb3LQOhIZ3nRLj6FNVrKgtflbaf2ts= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.62.0 h1:rbRJ8BBoVMsQShESYZ0FkvcITu8X8QNwJogcLUmDNNw= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.62.0/go.mod h1:ru6KHrNtNHxM4nD/vd6QrLVWgKhxPYgblq4VAtNawTQ= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 h1:Hf9xI/XLML9ElpiHVDNwvqI0hIFlzV8dgIr35kV1kRU= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0/go.mod h1:NfchwuyNoMcZ5MLHwPrODwUF1HWCXWrL31s8gSAdIKY= go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0 h1:Ahq7pZmv87yiyn3jeFz/LekZmPLLdKejuO3NcK9MssM= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0/go.mod h1:MJTqhM0im3mRLw1i8uGHnCvUEeS7VwRyxlLC78PA18M= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.37.0 h1:EtFWSnwW9hGObjkIdmlnWSydO+Qs8OwzfzXLUPg4xOc= @@ -1852,12 +1871,20 @@ go.opentelemetry.io/otel/exporters/zipkin v1.36.0 h1:s0n95ya5tOG03exJ5JySOdJFtwG go.opentelemetry.io/otel/exporters/zipkin v1.36.0/go.mod h1:m9wRxtKA2MZ1HcnNC4BKI+9aYe434qRZTCvI7QGUN7Y= go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.15.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -2088,6 +2115,8 @@ golang.org/x/oauth2 v0.5.0/go.mod h1:9/XBHVqLaWO3/BRHs5jbpYCnOZVjj5V0ndyaAM7KB4I golang.org/x/oauth2 v0.6.0/go.mod h1:ycmewcwgD4Rpr3eZJLSB4Kyyljb3qDh40vJ8STE5HKw= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= +golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -2559,8 +2588,12 @@ google.golang.org/genproto v0.0.0-20250715232539-7130f93afb79 h1:Nt6z9UHqSlIdIGJ google.golang.org/genproto v0.0.0-20250715232539-7130f93afb79/go.mod h1:kTmlBHMPqR5uCZPBvwa2B18mvubkjyY3CRLI0c6fj0s= google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c h1:AtEkQdl5b6zsybXcbz00j1LwNodDuH6hVifIaNqk7NQ= google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c/go.mod h1:ea2MjsO70ssTfCjiwHgI0ZFqcw45Ksuk2ckf9G468GA= +google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 h1:mepRgnBZa07I4TRuomDE4sTIYieg/osKmzIf4USdWS4= +google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8/go.mod h1:fDMmzKV90WSg1NbozdqrE64fkuTv6mlq2zxo9ad+3yo= google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c h1:qXWI/sQtv5UKboZ/zUk7h+mrf/lXORyI+n9DKDAusdg= google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c/go.mod h1:gw1tLEfykwDz2ET4a12jcXt4couGAm7IwsVaTy0Sflo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251111163417-95abcf5c77ba h1:UKgtfRM7Yh93Sya0Fo8ZzhDP4qBckrrxEr2oF5UIVb8= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251111163417-95abcf5c77ba/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -2603,9 +2636,12 @@ google.golang.org/grpc v1.53.0/go.mod h1:OnIrk0ipVdj4N5d9IUoFUx72/VlD7+jUsHwZgwS google.golang.org/grpc v1.55.0/go.mod h1:iYEXKGkEBhg1PjZQvoYEVPTDkHo1/bjTnfwTeGONTY8= google.golang.org/grpc v1.75.1 h1:/ODCNEuf9VghjgO3rqLcfg8fiOP0nSluljWFlDxELLI= google.golang.org/grpc v1.75.1/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= +google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM= +google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= google.golang.org/grpc/examples v0.0.0-20230224211313-3775f633ce20 h1:MLBCGN1O7GzIx+cBiwfYPwtmZ41U3Mn/cotLJciaArI= google.golang.org/grpc/examples v0.0.0-20230224211313-3775f633ce20/go.mod h1:Nr5H8+MlGWr5+xX/STzdoEqJrO+YteqFbMyCsrb6mH0= +google.golang.org/grpc/examples v0.0.0-20250407062114-b368379ef8f6 h1:ExN12ndbJ608cboPYflpTny6mXSzPrDLh0iTaVrRrds= google.golang.org/grpc/security/advancedtls v1.0.0 h1:/KQ7VP/1bs53/aopk9QhuPyFAp9Dm9Ejix3lzYkCrDA= google.golang.org/grpc/security/advancedtls v1.0.0/go.mod h1:o+s4go+e1PJ2AjuQMY5hU82W7lDlefjJA6FqEHRVHWk= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -2626,6 +2662,8 @@ google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/other/java/client/pom.xml b/other/java/client/pom.xml index 682582f7b..1989b9b05 100644 --- a/other/java/client/pom.xml +++ b/other/java/client/pom.xml @@ -5,7 +5,7 @@ <groupId>com.seaweedfs</groupId> <artifactId>seaweedfs-client</artifactId> - <version>3.80</version> + <version>3.80.1-SNAPSHOT</version> <name>SeaweedFS Java Client</name> <description>A java client for SeaweedFS.</description> @@ -33,7 +33,7 @@ <properties> <protobuf.version>3.25.5</protobuf.version> <!-- follow https://github.com/grpc/grpc-java --> - <grpc.version>1.75.0</grpc.version> + <grpc.version>1.77.0</grpc.version> <guava.version>32.0.0-jre</guava.version> </properties> diff --git a/other/java/client/src/main/java/seaweedfs/client/FilerGrpcClient.java b/other/java/client/src/main/java/seaweedfs/client/FilerGrpcClient.java index 44977d186..320a754ea 100644 --- a/other/java/client/src/main/java/seaweedfs/client/FilerGrpcClient.java +++ b/other/java/client/src/main/java/seaweedfs/client/FilerGrpcClient.java @@ -8,10 +8,13 @@ import io.grpc.netty.shaded.io.netty.handler.ssl.SslContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; public class FilerGrpcClient { @@ -29,10 +32,12 @@ public class FilerGrpcClient { public final int VOLUME_SERVER_ACCESS_FILER_PROXY = 2; public final Map<String, FilerProto.Locations> vidLocations = new HashMap<>(); protected int randomClientId; - private final ManagedChannel channel; - private final SeaweedFilerGrpc.SeaweedFilerBlockingStub blockingStub; - private final SeaweedFilerGrpc.SeaweedFilerStub asyncStub; - private final SeaweedFilerGrpc.SeaweedFilerFutureStub futureStub; + + // Connection pool to handle concurrent requests + private static final int CHANNEL_POOL_SIZE = 4; + private final List<ManagedChannel> channelPool; + private final AtomicInteger channelIndex = new AtomicInteger(0); + private boolean cipher = false; private String collection = ""; private String replication = ""; @@ -45,26 +50,18 @@ public class FilerGrpcClient { public FilerGrpcClient(String host, int port, int grpcPort, String cn, SslContext sslContext) { - this(sslContext == null ? - ManagedChannelBuilder.forAddress(host, grpcPort) - .usePlaintext() - .maxInboundMessageSize(1024 * 1024 * 1024) : - cn.isEmpty() ? - NettyChannelBuilder.forAddress(host, grpcPort) - .maxInboundMessageSize(1024 * 1024 * 1024) - .negotiationType(NegotiationType.TLS) - .sslContext(sslContext) : - NettyChannelBuilder.forAddress(host, grpcPort) - .maxInboundMessageSize(1024 * 1024 * 1024) - .negotiationType(NegotiationType.TLS) - .overrideAuthority(cn) //will not check hostname of the filer server - .sslContext(sslContext) - ); - filerAddress = SeaweedUtil.joinHostPort(host, port); - FilerProto.GetFilerConfigurationResponse filerConfigurationResponse = - this.getBlockingStub().getFilerConfiguration( + // Create a pool of channels for better concurrency handling + channelPool = new ArrayList<>(CHANNEL_POOL_SIZE); + + for (int i = 0; i < CHANNEL_POOL_SIZE; i++) { + channelPool.add(createChannelBuilder(host, grpcPort, sslContext, cn).build()); + } + + // Get filer configuration using first channel + FilerProto.GetFilerConfigurationResponse filerConfigurationResponse = SeaweedFilerGrpc + .newBlockingStub(channelPool.get(0)).getFilerConfiguration( FilerProto.GetFilerConfigurationRequest.newBuilder().build()); cipher = filerConfigurationResponse.getCipher(); collection = filerConfigurationResponse.getCollection(); @@ -73,11 +70,39 @@ public class FilerGrpcClient { } - private FilerGrpcClient(ManagedChannelBuilder<?> channelBuilder) { - channel = channelBuilder.build(); - blockingStub = SeaweedFilerGrpc.newBlockingStub(channel); - asyncStub = SeaweedFilerGrpc.newStub(channel); - futureStub = SeaweedFilerGrpc.newFutureStub(channel); + /** + * Creates a NettyChannelBuilder with common gRPC configuration. + * Supports plaintext and TLS modes with optional authority override. + */ + private NettyChannelBuilder createChannelBuilder(String host, int grpcPort, SslContext sslContext, String cn) { + NettyChannelBuilder builder = NettyChannelBuilder.forAddress(host, grpcPort) + .maxInboundMessageSize(1024 * 1024 * 1024) + .maxInboundMetadataSize(1024 * 1024) + .flowControlWindow(16 * 1024 * 1024) + .initialFlowControlWindow(16 * 1024 * 1024) + .maxHeaderListSize(16 * 1024 * 1024) + .keepAliveTime(30, TimeUnit.SECONDS) + .keepAliveTimeout(10, TimeUnit.SECONDS) + .keepAliveWithoutCalls(true) + .withOption(io.grpc.netty.shaded.io.netty.channel.ChannelOption.SO_RCVBUF, 16 * 1024 * 1024) + .withOption(io.grpc.netty.shaded.io.netty.channel.ChannelOption.SO_SNDBUF, 16 * 1024 * 1024); + + if (sslContext == null) { + builder.usePlaintext(); + } else { + builder.negotiationType(NegotiationType.TLS).sslContext(sslContext); + if (!cn.isEmpty()) { + builder.overrideAuthority(cn); + } + } + return builder; + } + + // Get a channel from the pool using round-robin + private ManagedChannel getChannel() { + int raw = channelIndex.getAndIncrement(); + int index = Math.floorMod(raw, CHANNEL_POOL_SIZE); + return channelPool.get(index); } public boolean isCipher() { @@ -93,19 +118,25 @@ public class FilerGrpcClient { } public void shutdown() throws InterruptedException { - channel.shutdown().awaitTermination(5, TimeUnit.SECONDS); + for (ManagedChannel channel : channelPool) { + channel.shutdown(); + } + for (ManagedChannel channel : channelPool) { + channel.awaitTermination(5, TimeUnit.SECONDS); + } } public SeaweedFilerGrpc.SeaweedFilerBlockingStub getBlockingStub() { - return blockingStub; + // Return a new stub using a channel from the pool (round-robin) + return SeaweedFilerGrpc.newBlockingStub(getChannel()); } public SeaweedFilerGrpc.SeaweedFilerStub getAsyncStub() { - return asyncStub; + return SeaweedFilerGrpc.newStub(getChannel()); } public SeaweedFilerGrpc.SeaweedFilerFutureStub getFutureStub() { - return futureStub; + return SeaweedFilerGrpc.newFutureStub(getChannel()); } public void setAccessVolumeServerDirectly() { diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedInputStream.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedInputStream.java index 64754321b..48a508db0 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedInputStream.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedInputStream.java @@ -23,7 +23,7 @@ public class SeaweedInputStream extends InputStream { private final long contentLength; private FilerProto.Entry entry; - private long position = 0; // cursor of the file + private long position = 0; // cursor of the file private boolean closed = false; @@ -44,7 +44,6 @@ public class SeaweedInputStream extends InputStream { } this.contentLength = SeaweedRead.fileSize(entry); - this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerClient, entry.getChunksList()); LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList); @@ -64,7 +63,6 @@ public class SeaweedInputStream extends InputStream { } this.contentLength = SeaweedRead.fileSize(entry); - this.visibleIntervalList = SeaweedRead.nonOverlappingVisibleIntervals(filerClient, entry.getChunksList()); LOG.debug("new path:{} entry:{} visibleIntervalList:{}", path, entry, visibleIntervalList); @@ -99,7 +97,8 @@ public class SeaweedInputStream extends InputStream { throw new IllegalArgumentException("requested read length is less than zero"); } if (len > (b.length - off)) { - throw new IllegalArgumentException("requested read length is more than will fit after requested offset in buffer"); + throw new IllegalArgumentException( + "requested read length is more than will fit after requested offset in buffer"); } ByteBuffer buf = ByteBuffer.wrap(b, off, len); @@ -114,21 +113,30 @@ public class SeaweedInputStream extends InputStream { throw new IllegalArgumentException("attempting to read from negative offset"); } if (position >= contentLength) { - return -1; // Hadoop prefers -1 to EOFException + return -1; // Hadoop prefers -1 to EOFException } long bytesRead = 0; int len = buf.remaining(); - if (this.position< Integer.MAX_VALUE && (this.position + len )<= entry.getContent().size()) { - entry.getContent().substring((int)this.position, (int)(this.position + len)).copyTo(buf); + if (this.position < Integer.MAX_VALUE && (this.position + len) <= entry.getContent().size()) { + entry.getContent().substring((int) this.position, (int) (this.position + len)).copyTo(buf); + bytesRead = len; // FIX: Update bytesRead after inline copy } else { - bytesRead = SeaweedRead.read(this.filerClient, this.visibleIntervalList, this.position, buf, SeaweedRead.fileSize(entry)); + // Use the known contentLength instead of recomputing from the entry to avoid + // races + bytesRead = SeaweedRead.read(this.filerClient, this.visibleIntervalList, this.position, buf, + this.contentLength); } if (bytesRead > Integer.MAX_VALUE) { throw new IOException("Unexpected Content-Length"); } + // Clamp premature EOFs: do not return -1 unless position >= contentLength + if (bytesRead < 0 && position < contentLength) { + bytesRead = 0; + } + if (bytesRead > 0) { this.position += bytesRead; } @@ -188,12 +196,15 @@ public class SeaweedInputStream extends InputStream { } final long remaining = this.contentLength - this.position; return remaining <= Integer.MAX_VALUE - ? (int) remaining : Integer.MAX_VALUE; + ? (int) remaining + : Integer.MAX_VALUE; } /** - * Returns the length of the file that this stream refers to. Note that the length returned is the length - * as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, + * Returns the length of the file that this stream refers to. Note that the + * length returned is the length + * as of the time the Stream was opened. Specifically, if there have been + * subsequent appends to the file, * they wont be reflected in the returned length. * * @return length of the file. diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java index 68c281992..ea4c99805 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedOutputStream.java @@ -23,15 +23,13 @@ public class SeaweedOutputStream extends OutputStream { private final ThreadPoolExecutor threadExecutor; private final ExecutorCompletionService<Void> completionService; private final ConcurrentLinkedDeque<WriteOperation> writeOperations; - private final boolean shouldSaveMetadata = false; private FilerProto.Entry.Builder entry; - private long position; + private long position; // Flushed bytes (committed to service) private boolean closed; private volatile IOException lastError; private long lastFlushOffset; private long lastTotalAppendOffset = 0; private ByteBuffer buffer; - private long outputIndex; private String replication = ""; private String collection = ""; @@ -44,7 +42,8 @@ public class SeaweedOutputStream extends OutputStream { } public SeaweedOutputStream(FilerClient filerClient, final String path, FilerProto.Entry.Builder entry, - final long position, final int bufferSize, final String replication) { + final long position, final int bufferSize, final String replication) { + this.filerClient = filerClient; this.replication = replication; this.path = path; @@ -58,8 +57,7 @@ public class SeaweedOutputStream extends OutputStream { this.maxConcurrentRequestCount = Runtime.getRuntime().availableProcessors(); - this.threadExecutor - = new ThreadPoolExecutor(maxConcurrentRequestCount, + this.threadExecutor = new ThreadPoolExecutor(maxConcurrentRequestCount, maxConcurrentRequestCount, 120L, TimeUnit.SECONDS, @@ -77,8 +75,7 @@ public class SeaweedOutputStream extends OutputStream { .setFileMode(0755) .setCrtime(now) .setMtime(now) - .clearGroupName() - ); + .clearGroupName()); } } @@ -86,14 +83,35 @@ public class SeaweedOutputStream extends OutputStream { public void setReplication(String replication) { this.replication = replication; } + public void setCollection(String collection) { this.collection = collection; } + /** + * Get the current position in the output stream. + * This returns the total position including both flushed and buffered data. + * + * @return current position (flushed + buffered bytes) + */ + public synchronized long getPos() throws IOException { + // Guard against NPE if called after close() + if (buffer == null) { + return position; + } + + // Return current position (flushed + buffered) + return position + buffer.position(); + } + public static String getParentDirectory(String path) { int protoIndex = path.indexOf("://"); if (protoIndex >= 0) { - int pathStart = path.indexOf("/", protoIndex+3); + int pathStart = path.indexOf("/", protoIndex + 3); + if (pathStart < 0) { + // No path segment; treat as root (e.g., "seaweedfs://host") + return "/"; + } path = path.substring(pathStart); } if (path.equals("/")) { @@ -116,6 +134,13 @@ public class SeaweedOutputStream extends OutputStream { private synchronized void flushWrittenBytesToServiceInternal(final long offset) throws IOException { try { + + // Set the file size in attributes based on our position + // This ensures Parquet footer metadata matches what we actually wrote + FilerProto.FuseAttributes.Builder attrBuilder = entry.getAttributes().toBuilder(); + attrBuilder.setFileSize(offset); + entry.setAttributes(attrBuilder); + SeaweedWrite.writeMeta(filerClient, getParentDirectory(path), entry); } catch (Exception ex) { throw new IOException(ex); @@ -125,7 +150,7 @@ public class SeaweedOutputStream extends OutputStream { @Override public void write(final int byteVal) throws IOException { - write(new byte[]{(byte) (byteVal & 0xFF)}); + write(new byte[] { (byte) (byteVal & 0xFF) }); } @Override @@ -141,8 +166,6 @@ public class SeaweedOutputStream extends OutputStream { throw new IndexOutOfBoundsException(); } - // System.out.println(path + " write [" + (outputIndex + off) + "," + ((outputIndex + off) + length) + ")"); - int currentOffset = off; int writableBytes = bufferSize - buffer.position(); int numberOfBytesToWrite = length; @@ -154,9 +177,11 @@ public class SeaweedOutputStream extends OutputStream { break; } - // System.out.println(path + " [" + (outputIndex + currentOffset) + "," + ((outputIndex + currentOffset) + writableBytes) + ") " + buffer.capacity()); + // System.out.println(path + " [" + (outputIndex + currentOffset) + "," + + // ((outputIndex + currentOffset) + writableBytes) + ") " + buffer.capacity()); buffer.put(data, currentOffset, writableBytes); currentOffset += writableBytes; + writeCurrentBufferToService(); numberOfBytesToWrite = numberOfBytesToWrite - writableBytes; writableBytes = bufferSize - buffer.position(); @@ -191,7 +216,6 @@ public class SeaweedOutputStream extends OutputStream { return; } - LOG.debug("close path: {}", path); try { flushInternal(); threadExecutor.shutdown(); @@ -209,28 +233,35 @@ public class SeaweedOutputStream extends OutputStream { } private synchronized void writeCurrentBufferToService() throws IOException { - if (buffer.position() == 0) { + int bufferPos = buffer.position(); + + if (bufferPos == 0) { return; } - position += submitWriteBufferToService(buffer, position); + int written = submitWriteBufferToService(buffer, position); + position += written; buffer = ByteBufferPool.request(bufferSize); } - private synchronized int submitWriteBufferToService(final ByteBuffer bufferToWrite, final long writePosition) throws IOException { + private synchronized int submitWriteBufferToService(final ByteBuffer bufferToWrite, final long writePosition) + throws IOException { - ((Buffer)bufferToWrite).flip(); + ((Buffer) bufferToWrite).flip(); int bytesLength = bufferToWrite.limit() - bufferToWrite.position(); if (threadExecutor.getQueue().size() >= maxConcurrentRequestCount) { waitForTaskToComplete(); } final Future<Void> job = completionService.submit(() -> { - // System.out.println(path + " is going to save [" + (writePosition) + "," + ((writePosition) + bytesLength) + ")"); - SeaweedWrite.writeData(entry, replication, collection, filerClient, writePosition, bufferToWrite.array(), bufferToWrite.position(), bufferToWrite.limit(), path); - // System.out.println(path + " saved [" + (writePosition) + "," + ((writePosition) + bytesLength) + ")"); + // System.out.println(path + " is going to save [" + (writePosition) + "," + + // ((writePosition) + bytesLength) + ")"); + SeaweedWrite.writeData(entry, replication, collection, filerClient, writePosition, bufferToWrite.array(), + bufferToWrite.position(), bufferToWrite.limit(), path); + // System.out.println(path + " saved [" + (writePosition) + "," + + // ((writePosition) + bytesLength) + ")"); ByteBufferPool.release(bufferToWrite); return null; }); @@ -318,12 +349,10 @@ public class SeaweedOutputStream extends OutputStream { private static class WriteOperation { private final Future<Void> task; - private final long startOffset; private final long length; WriteOperation(final Future<Void> task, final long startOffset, final long length) { this.task = task; - this.startOffset = startOffset; this.length = length; } } diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java index cac85d186..3fd184671 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java @@ -24,9 +24,10 @@ public class SeaweedRead { // returns bytesRead public static long read(FilerClient filerClient, List<VisibleInterval> visibleIntervals, - final long position, final ByteBuffer buf, final long fileSize) throws IOException { + final long position, final ByteBuffer buf, final long fileSize) throws IOException { - List<ChunkView> chunkViews = viewFromVisibles(visibleIntervals, position, buf.remaining()); + int originalRemaining = buf.remaining(); + List<ChunkView> chunkViews = viewFromVisibles(visibleIntervals, position, originalRemaining); Map<String, FilerProto.Locations> knownLocations = new HashMap<>(); @@ -51,7 +52,7 @@ public class SeaweedRead { } } - //TODO parallel this + // TODO parallel this long readCount = 0; long startOffset = position; for (ChunkView chunkView : chunkViews) { @@ -59,7 +60,7 @@ public class SeaweedRead { if (startOffset < chunkView.logicOffset) { long gap = chunkView.logicOffset - startOffset; LOG.debug("zero [{},{})", startOffset, startOffset + gap); - buf.position(buf.position()+ (int)gap); + buf.position(buf.position() + (int) gap); readCount += gap; startOffset += gap; } @@ -81,12 +82,17 @@ public class SeaweedRead { } - long limit = Math.min(buf.limit(), fileSize); + // Fix: Calculate the correct limit based on the read position and requested + // size, + // not the buffer's absolute limit. This fixes the 78-byte EOF error when + // seeking + // near the end of the file. + long limit = Math.min(position + originalRemaining, fileSize); if (startOffset < limit) { long gap = limit - startOffset; LOG.debug("zero2 [{},{})", startOffset, startOffset + gap); - buf.position(buf.position()+ (int)gap); + buf.position(buf.position() + (int) gap); readCount += gap; startOffset += gap; } @@ -94,7 +100,8 @@ public class SeaweedRead { return readCount; } - private static int readChunkView(FilerClient filerClient, long startOffset, ByteBuffer buf, ChunkView chunkView, FilerProto.Locations locations) throws IOException { + private static int readChunkView(FilerClient filerClient, long startOffset, ByteBuffer buf, ChunkView chunkView, + FilerProto.Locations locations) throws IOException { byte[] chunkData = chunkCache.getChunk(chunkView.fileId); @@ -105,13 +112,15 @@ public class SeaweedRead { int len = (int) chunkView.size - (int) (startOffset - chunkView.logicOffset); LOG.debug("readChunkView fid:{} chunkData.length:{} chunkView.offset:{} chunkView[{};{}) startOffset:{}", - chunkView.fileId, chunkData.length, chunkView.offset, chunkView.logicOffset, chunkView.logicOffset + chunkView.size, startOffset); + chunkView.fileId, chunkData.length, chunkView.offset, chunkView.logicOffset, + chunkView.logicOffset + chunkView.size, startOffset); buf.put(chunkData, (int) (startOffset - chunkView.logicOffset + chunkView.offset), len); return len; } - public static byte[] doFetchFullChunkData(FilerClient filerClient, ChunkView chunkView, FilerProto.Locations locations) throws IOException { + public static byte[] doFetchFullChunkData(FilerClient filerClient, ChunkView chunkView, + FilerProto.Locations locations) throws IOException { byte[] data = null; IOException lastException = null; @@ -214,8 +223,7 @@ public class SeaweedRead { chunkStart, isFullChunk, chunk.cipherKey, - chunk.isCompressed - )); + chunk.isCompressed)); } } return views; @@ -239,7 +247,10 @@ public class SeaweedRead { } public static long fileSize(FilerProto.Entry entry) { - return Math.max(totalSize(entry.getChunksList()), entry.getAttributes().getFileSize()); + long chunksSize = totalSize(entry.getChunksList()); + long attrSize = entry.getAttributes().getFileSize(); + long finalSize = Math.max(chunksSize, attrSize); + return finalSize; } public static long totalSize(List<FilerProto.FileChunk> chunksList) { @@ -263,7 +274,8 @@ public class SeaweedRead { public final byte[] cipherKey; public final boolean isCompressed; - public VisibleInterval(long start, long stop, String fileId, long modifiedTime, long chunkOffset, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) { + public VisibleInterval(long start, long stop, String fileId, long modifiedTime, long chunkOffset, + boolean isFullChunk, byte[] cipherKey, boolean isCompressed) { this.start = start; this.stop = stop; this.modifiedTime = modifiedTime; @@ -297,7 +309,8 @@ public class SeaweedRead { public final byte[] cipherKey; public final boolean isCompressed; - public ChunkView(String fileId, long offset, long size, long logicOffset, boolean isFullChunk, byte[] cipherKey, boolean isCompressed) { + public ChunkView(String fileId, long offset, long size, long logicOffset, boolean isFullChunk, byte[] cipherKey, + boolean isCompressed) { this.fileId = fileId; this.offset = offset; this.size = size; diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java index 88c7cefbe..0fadd53cc 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedWrite.java @@ -2,6 +2,7 @@ package seaweedfs.client; import com.google.common.base.Strings; import com.google.protobuf.ByteString; +import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.mime.HttpMultipartMode; @@ -25,13 +26,13 @@ public class SeaweedWrite { private static final SecureRandom random = new SecureRandom(); public static void writeData(FilerProto.Entry.Builder entry, - final String replication, - String collection, - final FilerClient filerClient, - final long offset, - final byte[] bytes, - final long bytesOffset, final long bytesLength, - final String path) throws IOException { + final String replication, + String collection, + final FilerClient filerClient, + final long offset, + final byte[] bytes, + final long bytesOffset, final long bytesLength, + final String path) throws IOException { IOException lastException = null; for (long waitTime = 1000L; waitTime < 10 * 1000; waitTime += waitTime / 2) { @@ -60,21 +61,50 @@ public class SeaweedWrite { } public static FilerProto.FileChunk.Builder writeChunk(final String replication, - final String collection, - final FilerClient filerClient, - final long offset, - final byte[] bytes, - final long bytesOffset, - final long bytesLength, - final String path) throws IOException { - FilerProto.AssignVolumeResponse response = filerClient.getBlockingStub().assignVolume( - FilerProto.AssignVolumeRequest.newBuilder() - .setCollection(Strings.isNullOrEmpty(collection) ? filerClient.getCollection() : collection) - .setReplication(Strings.isNullOrEmpty(replication) ? filerClient.getReplication() : replication) - .setDataCenter("") - .setTtlSec(0) - .setPath(path) - .build()); + final String collection, + final FilerClient filerClient, + final long offset, + final byte[] bytes, + final long bytesOffset, + final long bytesLength, + final String path) throws IOException { + + // Retry assignVolume call for transient network/server errors + FilerProto.AssignVolumeResponse response = null; + IOException lastException = null; + int maxRetries = 3; + + for (int attempt = 0; attempt < maxRetries; attempt++) { + try { + response = filerClient.getBlockingStub().assignVolume( + FilerProto.AssignVolumeRequest.newBuilder() + .setCollection( + Strings.isNullOrEmpty(collection) ? filerClient.getCollection() : collection) + .setReplication( + Strings.isNullOrEmpty(replication) ? filerClient.getReplication() : replication) + .setDataCenter("") + .setTtlSec(0) + .setPath(path) + .build()); + break; // Success, exit retry loop + } catch (io.grpc.StatusRuntimeException e) { + lastException = new IOException( + "assignVolume failed (attempt " + (attempt + 1) + "/" + maxRetries + "): " + e.getMessage(), e); + if (attempt < maxRetries - 1) { + try { + Thread.sleep(100 * (attempt + 1)); // Exponential backoff: 100ms, 200ms, 300ms + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted during retry", ie); + } + } + } + } + + if (response == null) { + throw lastException != null ? lastException + : new IOException("assignVolume failed after " + maxRetries + " attempts"); + } if (!Strings.isNullOrEmpty(response.getError())) { throw new IOException(response.getError()); @@ -83,7 +113,8 @@ public class SeaweedWrite { String fileId = response.getFileId(); String auth = response.getAuth(); - String targetUrl = filerClient.getChunkUrl(fileId, response.getLocation().getUrl(), response.getLocation().getPublicUrl()); + String targetUrl = filerClient.getChunkUrl(fileId, response.getLocation().getUrl(), + response.getLocation().getPublicUrl()); ByteString cipherKeyString = com.google.protobuf.ByteString.EMPTY; byte[] cipherKey = null; @@ -94,8 +125,6 @@ public class SeaweedWrite { String etag = multipartUpload(targetUrl, auth, bytes, bytesOffset, bytesLength, cipherKey); - LOG.debug("write file chunk {} size {}", targetUrl, bytesLength); - return FilerProto.FileChunk.newBuilder() .setFileId(fileId) .setOffset(offset) @@ -106,27 +135,28 @@ public class SeaweedWrite { } public static void writeMeta(final FilerClient filerClient, - final String parentDirectory, - final FilerProto.Entry.Builder entry) throws IOException { + final String parentDirectory, + final FilerProto.Entry.Builder entry) throws IOException { synchronized (entry) { - List<FilerProto.FileChunk> chunks = FileChunkManifest.maybeManifestize(filerClient, entry.getChunksList(), parentDirectory); + List<FilerProto.FileChunk> chunks = FileChunkManifest.maybeManifestize(filerClient, entry.getChunksList(), + parentDirectory); + entry.clearChunks(); entry.addAllChunks(chunks); filerClient.getBlockingStub().createEntry( FilerProto.CreateEntryRequest.newBuilder() .setDirectory(parentDirectory) .setEntry(entry) - .build() - ); + .build()); } } private static String multipartUpload(String targetUrl, - String auth, - final byte[] bytes, - final long bytesOffset, final long bytesLength, - byte[] cipherKey) throws IOException { + String auth, + final byte[] bytes, + final long bytesOffset, final long bytesLength, + byte[] cipherKey) throws IOException { MessageDigest md = null; try { md = MessageDigest.getInstance("MD5"); @@ -162,8 +192,10 @@ public class SeaweedWrite { try { if (response.getStatusLine().getStatusCode() / 100 != 2) { - if (response.getEntity().getContentType() != null && response.getEntity().getContentType().getValue().equals("application/json")) { - throw new IOException(EntityUtils.toString(response.getEntity(), "UTF-8")); + HttpEntity entity = response.getEntity(); + if (entity != null && entity.getContentType() != null + && entity.getContentType().getValue().equals("application/json")) { + throw new IOException(EntityUtils.toString(entity, "UTF-8")); } else { throw new IOException(response.getStatusLine().getReasonPhrase()); } diff --git a/other/java/client/src/test/java/seaweedfs/client/GetPosBufferTest.java b/other/java/client/src/test/java/seaweedfs/client/GetPosBufferTest.java new file mode 100644 index 000000000..d49e17e72 --- /dev/null +++ b/other/java/client/src/test/java/seaweedfs/client/GetPosBufferTest.java @@ -0,0 +1,303 @@ +package seaweedfs.client; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import static org.junit.Assert.*; + +/** + * Unit test to reproduce the Parquet EOF issue. + * + * The issue: When Parquet writes column chunks, it calls getPos() to record + * offsets. + * If getPos() returns a position that doesn't include buffered (unflushed) + * data, + * the footer metadata will have incorrect offsets. + * + * This test simulates Parquet's behavior: + * 1. Write some data (column chunk 1) + * 2. Call getPos() - Parquet records this as the END of chunk 1 + * 3. Write more data (column chunk 2) + * 4. Call getPos() - Parquet records this as the END of chunk 2 + * 5. Close the file + * 6. Verify that the recorded positions match the actual file content + * + * Prerequisites: + * - SeaweedFS master, volume server, and filer must be running + * - Default ports: filer HTTP 8888, filer gRPC 18888 + * + * To run: + * export SEAWEEDFS_TEST_ENABLED=true + * cd other/java/client + * mvn test -Dtest=GetPosBufferTest + */ +public class GetPosBufferTest { + + private FilerClient filerClient; + private static final String TEST_ROOT = "/test-getpos-buffer"; + private static final boolean TESTS_ENABLED = "true".equalsIgnoreCase(System.getenv("SEAWEEDFS_TEST_ENABLED")); + + @Before + public void setUp() throws Exception { + if (!TESTS_ENABLED) { + return; + } + + String filerHost = System.getenv().getOrDefault("SEAWEEDFS_FILER_HOST", "localhost"); + String filerGrpcPort = System.getenv().getOrDefault("SEAWEEDFS_FILER_GRPC_PORT", "18888"); + + filerClient = new FilerClient(filerHost, Integer.parseInt(filerGrpcPort)); + + // Clean up any existing test directory + if (filerClient.exists(TEST_ROOT)) { + filerClient.rm(TEST_ROOT, true, true); + } + + // Create test root directory + filerClient.mkdirs(TEST_ROOT, 0755); + } + + @After + public void tearDown() throws Exception { + if (!TESTS_ENABLED) { + return; + } + if (filerClient != null) { + filerClient.rm(TEST_ROOT, true, true); + filerClient.shutdown(); + } + } + + @Test + public void testGetPosWithBufferedData() throws IOException { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n=== Testing getPos() with buffered data ==="); + + String testPath = TEST_ROOT + "/getpos-test.bin"; + + // Simulate what Parquet does when writing column chunks + SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); + + // Write "column chunk 1" - 100 bytes + byte[] chunk1 = new byte[100]; + for (int i = 0; i < 100; i++) { + chunk1[i] = (byte) i; + } + outputStream.write(chunk1); + + // Parquet calls getPos() here to record end of chunk 1 + long posAfterChunk1 = outputStream.getPos(); + System.out.println("Position after chunk 1 (100 bytes): " + posAfterChunk1); + assertEquals("getPos() should return 100 after writing 100 bytes", 100, posAfterChunk1); + + // Write "column chunk 2" - 200 bytes + byte[] chunk2 = new byte[200]; + for (int i = 0; i < 200; i++) { + chunk2[i] = (byte) (i + 100); + } + outputStream.write(chunk2); + + // Parquet calls getPos() here to record end of chunk 2 + long posAfterChunk2 = outputStream.getPos(); + System.out.println("Position after chunk 2 (200 more bytes): " + posAfterChunk2); + assertEquals("getPos() should return 300 after writing 300 bytes total", 300, posAfterChunk2); + + // Write "column chunk 3" - small chunk of 78 bytes (the problematic size!) + byte[] chunk3 = new byte[78]; + for (int i = 0; i < 78; i++) { + chunk3[i] = (byte) (i + 50); + } + outputStream.write(chunk3); + + // Parquet calls getPos() here to record end of chunk 3 + long posAfterChunk3 = outputStream.getPos(); + System.out.println("Position after chunk 3 (78 more bytes): " + posAfterChunk3); + assertEquals("getPos() should return 378 after writing 378 bytes total", 378, posAfterChunk3); + + // Close to flush everything + outputStream.close(); + System.out.println("File closed successfully"); + + // Now read the file and verify its actual size matches what getPos() reported + FilerProto.Entry entry = filerClient.lookupEntry( + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + + long actualFileSize = SeaweedRead.fileSize(entry); + System.out.println("Actual file size on disk: " + actualFileSize); + + assertEquals("File size should match the last getPos() value", 378, actualFileSize); + + // Now read the file and verify we can read all the data + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); + + byte[] readBuffer = new byte[500]; // Larger buffer to read everything + int totalRead = 0; + int bytesRead; + while ((bytesRead = inputStream.read(readBuffer, totalRead, readBuffer.length - totalRead)) > 0) { + totalRead += bytesRead; + } + inputStream.close(); + + System.out.println("Total bytes read: " + totalRead); + assertEquals("Should read exactly 378 bytes", 378, totalRead); + + // Verify the data is correct + for (int i = 0; i < 100; i++) { + assertEquals("Chunk 1 data mismatch at byte " + i, (byte) i, readBuffer[i]); + } + for (int i = 0; i < 200; i++) { + assertEquals("Chunk 2 data mismatch at byte " + (100 + i), (byte) (i + 100), readBuffer[100 + i]); + } + for (int i = 0; i < 78; i++) { + assertEquals("Chunk 3 data mismatch at byte " + (300 + i), (byte) (i + 50), readBuffer[300 + i]); + } + + System.out.println("SUCCESS: All data verified correctly!\n"); + } + + @Test + public void testGetPosWithSmallWrites() throws IOException { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n=== Testing getPos() with many small writes (Parquet pattern) ==="); + + String testPath = TEST_ROOT + "/small-writes-test.bin"; + + SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); + + // Parquet writes column data in small chunks and frequently calls getPos() + String[] columnData = { "Alice", "Bob", "Charlie", "David" }; + long[] recordedPositions = new long[columnData.length]; + + for (int i = 0; i < columnData.length; i++) { + byte[] data = columnData[i].getBytes(StandardCharsets.UTF_8); + outputStream.write(data); + + // Parquet calls getPos() after each value to track offsets + recordedPositions[i] = outputStream.getPos(); + System.out.println("After writing '" + columnData[i] + "': pos=" + recordedPositions[i]); + } + + long finalPos = outputStream.getPos(); + System.out.println("Final position before close: " + finalPos); + + outputStream.close(); + + // Verify file size + FilerProto.Entry entry = filerClient.lookupEntry( + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + long actualFileSize = SeaweedRead.fileSize(entry); + + System.out.println("Actual file size: " + actualFileSize); + assertEquals("File size should match final getPos()", finalPos, actualFileSize); + + // Verify we can read using the recorded positions + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); + + long currentPos = 0; + for (int i = 0; i < columnData.length; i++) { + long nextPos = recordedPositions[i]; + int length = (int) (nextPos - currentPos); + + byte[] buffer = new byte[length]; + int bytesRead = inputStream.read(buffer, 0, length); + + assertEquals("Should read " + length + " bytes for '" + columnData[i] + "'", length, bytesRead); + + String readData = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + System.out.println("Read at offset " + currentPos + ": '" + readData + "'"); + assertEquals("Data mismatch", columnData[i], readData); + + currentPos = nextPos; + } + + inputStream.close(); + + System.out.println("SUCCESS: Small writes with getPos() tracking work correctly!\n"); + } + + @Test + public void testGetPosWithExactly78BytesBuffered() throws IOException { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n=== Testing getPos() with EXACTLY 78 bytes buffered (the bug size!) ==="); + + String testPath = TEST_ROOT + "/78-bytes-test.bin"; + + SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); + + // Write some initial data + byte[] initial = new byte[1000]; + for (int i = 0; i < 1000; i++) { + initial[i] = (byte) i; + } + outputStream.write(initial); + outputStream.flush(); // Ensure this is flushed + + long posAfterFlush = outputStream.getPos(); + System.out.println("Position after 1000 bytes + flush: " + posAfterFlush); + assertEquals("Should be at position 1000 after flush", 1000, posAfterFlush); + + // Now write EXACTLY 78 bytes (the problematic buffer size in our bug) + byte[] problematicChunk = new byte[78]; + for (int i = 0; i < 78; i++) { + problematicChunk[i] = (byte) (i + 50); + } + outputStream.write(problematicChunk); + + // DO NOT FLUSH - this is the bug scenario! + // Parquet calls getPos() here while the 78 bytes are still buffered + long posWithBufferedData = outputStream.getPos(); + System.out.println("Position with 78 bytes BUFFERED (not flushed): " + posWithBufferedData); + + // This MUST return 1078, not 1000! + assertEquals("getPos() MUST include buffered data", 1078, posWithBufferedData); + + // Now close (which will flush) + outputStream.close(); + + // Verify actual file size + FilerProto.Entry entry = filerClient.lookupEntry( + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + long actualFileSize = SeaweedRead.fileSize(entry); + + System.out.println("Actual file size: " + actualFileSize); + assertEquals("File size must be 1078", 1078, actualFileSize); + + // Try to read at position 1000 for 78 bytes (what Parquet would try) + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); + inputStream.seek(1000); + + byte[] readBuffer = new byte[78]; + int bytesRead = inputStream.read(readBuffer, 0, 78); + + System.out.println("Bytes read at position 1000: " + bytesRead); + assertEquals("Should successfully read 78 bytes at position 1000", 78, bytesRead); + + // Verify the data matches + for (int i = 0; i < 78; i++) { + assertEquals("Data mismatch at byte " + i, problematicChunk[i], readBuffer[i]); + } + + inputStream.close(); + + System.out.println("SUCCESS: getPos() correctly includes buffered data!\n"); + } +} diff --git a/other/java/client/src/test/java/seaweedfs/client/SeaweedStreamIntegrationTest.java b/other/java/client/src/test/java/seaweedfs/client/SeaweedStreamIntegrationTest.java index f384e059f..3cfb2ce9e 100644 --- a/other/java/client/src/test/java/seaweedfs/client/SeaweedStreamIntegrationTest.java +++ b/other/java/client/src/test/java/seaweedfs/client/SeaweedStreamIntegrationTest.java @@ -28,22 +28,21 @@ public class SeaweedStreamIntegrationTest { private FilerClient filerClient; private static final String TEST_ROOT = "/test-stream-integration"; - private static final boolean TESTS_ENABLED = - "true".equalsIgnoreCase(System.getenv("SEAWEEDFS_TEST_ENABLED")); + private static final boolean TESTS_ENABLED = "true".equalsIgnoreCase(System.getenv("SEAWEEDFS_TEST_ENABLED")); @Before public void setUp() throws Exception { if (!TESTS_ENABLED) { return; } - + filerClient = new FilerClient("localhost", 18888); - + // Clean up any existing test directory if (filerClient.exists(TEST_ROOT)) { filerClient.rm(TEST_ROOT, true, true); } - + // Create test root directory filerClient.mkdirs(TEST_ROOT, 0755); } @@ -53,7 +52,7 @@ public class SeaweedStreamIntegrationTest { if (!TESTS_ENABLED || filerClient == null) { return; } - + try { // Clean up test directory if (filerClient.exists(TEST_ROOT)) { @@ -70,30 +69,29 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/small.txt"; String testContent = "Hello, SeaweedFS!"; - + // Write file SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.write(testContent.getBytes(StandardCharsets.UTF_8)); outputStream.close(); - + // Verify file exists assertTrue("File should exist", filerClient.exists(testPath)); - + // Read file FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); assertNotNull("Entry should not be null", entry); - + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); byte[] buffer = new byte[testContent.length()]; int bytesRead = inputStream.read(buffer); inputStream.close(); - + assertEquals("Should read all bytes", testContent.length(), bytesRead); assertEquals("Content should match", testContent, new String(buffer, StandardCharsets.UTF_8)); } @@ -104,43 +102,42 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/large.bin"; int fileSize = 10 * 1024 * 1024; // 10 MB - + // Generate random data byte[] originalData = new byte[fileSize]; new Random(42).nextBytes(originalData); // Use seed for reproducibility - + // Write file SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.write(originalData); outputStream.close(); - + // Verify file exists assertTrue("File should exist", filerClient.exists(testPath)); - + // Read file FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); assertNotNull("Entry should not be null", entry); - + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); - + // Read file in chunks to handle large files properly byte[] readData = new byte[fileSize]; int totalRead = 0; int bytesRead; byte[] buffer = new byte[8192]; // Read in 8KB chunks - + while ((bytesRead = inputStream.read(buffer)) > 0) { System.arraycopy(buffer, 0, readData, totalRead, bytesRead); totalRead += bytesRead; } inputStream.close(); - + assertEquals("Should read all bytes", fileSize, totalRead); assertArrayEquals("Content should match", originalData, readData); } @@ -151,31 +148,30 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/chunked.txt"; - String[] chunks = {"First chunk. ", "Second chunk. ", "Third chunk."}; - + String[] chunks = { "First chunk. ", "Second chunk. ", "Third chunk." }; + // Write file in chunks SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); for (String chunk : chunks) { outputStream.write(chunk.getBytes(StandardCharsets.UTF_8)); } outputStream.close(); - + // Read and verify FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); - + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); byte[] buffer = new byte[1024]; int bytesRead = inputStream.read(buffer); inputStream.close(); - + String expected = String.join("", chunks); String actual = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); - + assertEquals("Content should match", expected, actual); } @@ -185,31 +181,30 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/offset.txt"; String testContent = "0123456789ABCDEFGHIJ"; - + // Write file SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.write(testContent.getBytes(StandardCharsets.UTF_8)); outputStream.close(); - + // Read with offset FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); - + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); inputStream.seek(10); // Skip first 10 bytes - + byte[] buffer = new byte[10]; int bytesRead = inputStream.read(buffer); inputStream.close(); - + assertEquals("Should read 10 bytes", 10, bytesRead); - assertEquals("Should read from offset", "ABCDEFGHIJ", - new String(buffer, StandardCharsets.UTF_8)); + assertEquals("Should read from offset", "ABCDEFGHIJ", + new String(buffer, StandardCharsets.UTF_8)); } @Test @@ -218,32 +213,31 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/partial.txt"; String testContent = "The quick brown fox jumps over the lazy dog"; - + // Write file SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.write(testContent.getBytes(StandardCharsets.UTF_8)); outputStream.close(); - + // Read partial FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); - + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); - + // Read only "quick brown" inputStream.seek(4); byte[] buffer = new byte[11]; int bytesRead = inputStream.read(buffer); inputStream.close(); - + assertEquals("Should read 11 bytes", 11, bytesRead); - assertEquals("Should read partial content", "quick brown", - new String(buffer, StandardCharsets.UTF_8)); + assertEquals("Should read partial content", "quick brown", + new String(buffer, StandardCharsets.UTF_8)); } @Test @@ -252,28 +246,27 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/empty.txt"; - + // Write empty file SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.close(); - + // Verify file exists assertTrue("File should exist", filerClient.exists(testPath)); - + // Read empty file FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); assertNotNull("Entry should not be null", entry); - + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); byte[] buffer = new byte[100]; int bytesRead = inputStream.read(buffer); inputStream.close(); - + assertEquals("Should read 0 bytes from empty file", -1, bytesRead); } @@ -283,32 +276,31 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/overwrite.txt"; String originalContent = "Original content"; String newContent = "New content that overwrites the original"; - + // Write original file SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.write(originalContent.getBytes(StandardCharsets.UTF_8)); outputStream.close(); - + // Overwrite file outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.write(newContent.getBytes(StandardCharsets.UTF_8)); outputStream.close(); - + // Read and verify FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); - + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); byte[] buffer = new byte[1024]; int bytesRead = inputStream.read(buffer); inputStream.close(); - + String actual = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); assertEquals("Should have new content", newContent, actual); } @@ -319,23 +311,22 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/multireads.txt"; String testContent = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - + // Write file SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.write(testContent.getBytes(StandardCharsets.UTF_8)); outputStream.close(); - + // Read in multiple small chunks FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); - + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); - + StringBuilder result = new StringBuilder(); byte[] buffer = new byte[5]; int bytesRead; @@ -343,7 +334,7 @@ public class SeaweedStreamIntegrationTest { result.append(new String(buffer, 0, bytesRead, StandardCharsets.UTF_8)); } inputStream.close(); - + assertEquals("Should read entire content", testContent, result.toString()); } @@ -353,29 +344,28 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/binary.bin"; byte[] binaryData = new byte[256]; for (int i = 0; i < 256; i++) { binaryData[i] = (byte) i; } - + // Write binary file SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.write(binaryData); outputStream.close(); - + // Read and verify FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); - + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); byte[] readData = new byte[256]; int bytesRead = inputStream.read(readData); inputStream.close(); - + assertEquals("Should read all bytes", 256, bytesRead); assertArrayEquals("Binary data should match", binaryData, readData); } @@ -386,32 +376,132 @@ public class SeaweedStreamIntegrationTest { System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); return; } - + String testPath = TEST_ROOT + "/flush.txt"; String testContent = "Content to flush"; - + // Write file with flush SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); outputStream.write(testContent.getBytes(StandardCharsets.UTF_8)); outputStream.flush(); // Explicitly flush outputStream.close(); - + // Verify file was written assertTrue("File should exist after flush", filerClient.exists(testPath)); - + // Read and verify FilerProto.Entry entry = filerClient.lookupEntry( - SeaweedOutputStream.getParentDirectory(testPath), - SeaweedOutputStream.getFileName(testPath) - ); - + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); byte[] buffer = new byte[testContent.length()]; int bytesRead = inputStream.read(buffer); inputStream.close(); - - assertEquals("Content should match", testContent, - new String(buffer, 0, bytesRead, StandardCharsets.UTF_8)); + + assertEquals("Content should match", testContent, + new String(buffer, 0, bytesRead, StandardCharsets.UTF_8)); } -} + /** + * Tests range reads similar to how Parquet reads column chunks. + * This simulates: + * 1. Seeking to specific offsets + * 2. Reading specific byte ranges + * 3. Verifying each read() call returns the correct number of bytes + * + * This test specifically addresses the bug where read() was returning 0 + * for inline content or -1 prematurely for chunked reads. + */ + @Test + public void testRangeReads() throws IOException { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + String testPath = TEST_ROOT + "/rangereads.dat"; + + // Create a 1275-byte file (similar to the Parquet file size that was failing) + byte[] testData = new byte[1275]; + Random random = new Random(42); // Fixed seed for reproducibility + random.nextBytes(testData); + + // Write file + SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); + outputStream.write(testData); + outputStream.close(); + + // Read file entry + FilerProto.Entry entry = filerClient.lookupEntry( + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + + // Test 1: Read last 8 bytes (like reading Parquet footer length) + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); + inputStream.seek(1267); + byte[] buffer = new byte[8]; + int bytesRead = inputStream.read(buffer, 0, 8); + assertEquals("Should read 8 bytes at offset 1267", 8, bytesRead); + assertArrayEquals("Content at offset 1267 should match", + Arrays.copyOfRange(testData, 1267, 1275), buffer); + inputStream.close(); + + // Test 2: Read large chunk in middle (like reading column data) + inputStream = new SeaweedInputStream(filerClient, testPath, entry); + inputStream.seek(383); + buffer = new byte[884]; // Read bytes 383-1267 + bytesRead = inputStream.read(buffer, 0, 884); + assertEquals("Should read 884 bytes at offset 383", 884, bytesRead); + assertArrayEquals("Content at offset 383 should match", + Arrays.copyOfRange(testData, 383, 1267), buffer); + inputStream.close(); + + // Test 3: Read from beginning (like reading Parquet magic bytes) + inputStream = new SeaweedInputStream(filerClient, testPath, entry); + buffer = new byte[4]; + bytesRead = inputStream.read(buffer, 0, 4); + assertEquals("Should read 4 bytes at offset 0", 4, bytesRead); + assertArrayEquals("Content at offset 0 should match", + Arrays.copyOfRange(testData, 0, 4), buffer); + inputStream.close(); + + // Test 4: Multiple sequential reads without seeking (like + // H2SeekableInputStream.readFully) + // This is the critical test case that was failing! + inputStream = new SeaweedInputStream(filerClient, testPath, entry); + inputStream.seek(1197); // Position where EOF was being returned prematurely + + byte[] fullBuffer = new byte[78]; // Try to read the "missing" 78 bytes + int totalRead = 0; + int offset = 0; + int remaining = 78; + + // Simulate Parquet's H2SeekableInputStream.readFully() loop + while (remaining > 0) { + int read = inputStream.read(fullBuffer, offset, remaining); + if (read == -1) { + fail(String.format( + "Got EOF after reading %d bytes, but expected to read %d more bytes (total requested: 78)", + totalRead, remaining)); + } + assertTrue("Each read() should return positive bytes", read > 0); + totalRead += read; + offset += read; + remaining -= read; + } + + assertEquals("Should read all 78 bytes in readFully loop", 78, totalRead); + assertArrayEquals("Content at offset 1197 should match", + Arrays.copyOfRange(testData, 1197, 1275), fullBuffer); + inputStream.close(); + + // Test 5: Read entire file in one go + inputStream = new SeaweedInputStream(filerClient, testPath, entry); + byte[] allData = new byte[1275]; + bytesRead = inputStream.read(allData, 0, 1275); + assertEquals("Should read entire 1275 bytes", 1275, bytesRead); + assertArrayEquals("Entire content should match", testData, allData); + inputStream.close(); + } +} diff --git a/other/java/examples/pom.xml b/other/java/examples/pom.xml index 5c0981eae..51b14e9ea 100644 --- a/other/java/examples/pom.xml +++ b/other/java/examples/pom.xml @@ -16,14 +16,14 @@ </dependency> <dependency> <groupId>com.seaweedfs</groupId> - <artifactId>seaweedfs-hadoop2-client</artifactId> + <artifactId>seaweedfs-hadoop3-client</artifactId> <version>3.80</version> <scope>compile</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> - <version>2.10.2</version> + <version>3.4.0</version> <scope>compile</scope> </dependency> </dependencies> diff --git a/other/java/hdfs-over-ftp/pom.xml b/other/java/hdfs-over-ftp/pom.xml deleted file mode 100644 index 3f7e6c4b0..000000000 --- a/other/java/hdfs-over-ftp/pom.xml +++ /dev/null @@ -1,120 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <groupId>hdfs-over-ftp</groupId> - <artifactId>hdfs-over-ftp</artifactId> - <version>1.0</version> - - <parent> - <groupId>org.springframework.boot</groupId> - <artifactId>spring-boot-starter-parent</artifactId> - <version>2.4.3</version> - </parent> - - <dependencies> - <dependency> - <groupId>org.springframework.boot</groupId> - <artifactId>spring-boot-starter</artifactId> - </dependency> - <dependency> - <groupId>org.springframework.boot</groupId> - <artifactId>spring-boot-starter-web</artifactId> - </dependency> - <dependency> - <groupId>io.springfox</groupId> - <artifactId>springfox-swagger2</artifactId> - <version>2.9.2</version> - </dependency> - <dependency> - <groupId>io.springfox</groupId> - <artifactId>springfox-swagger-ui</artifactId> - <version>2.10.0</version> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-common</artifactId> - <version>3.4.0</version> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>3.2.1</version> - </dependency> - <dependency> - <groupId>org.apache.ftpserver</groupId> - <artifactId>ftpserver-core</artifactId> - <version>1.1.1</version> - </dependency> - <dependency> - <groupId>com.seaweedfs</groupId> - <artifactId>seaweedfs-hadoop3-client</artifactId> - <version>1.6.2</version> - </dependency> - </dependencies> - - - <build> - <plugins> - <plugin> - <groupId>org.springframework.boot</groupId> - <artifactId>spring-boot-maven-plugin</artifactId> - </plugin> - - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <version>3.1</version> - <configuration> - <source>1.8</source> - <target>1.8</target> - <encoding>UTF-8</encoding> - <compilerArguments> - <verbose /> - <bootclasspath>${java.home}/lib/rt.jar</bootclasspath> - </compilerArguments> - </configuration> - </plugin> - - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <version>2.6</version> - <configuration> - <archive> - <manifest> - <mainClass>org.apache.hadoop.seaweed.ftp.ApplicationServer</mainClass> - <addClasspath>true</addClasspath> - <classpathPrefix>lib/</classpathPrefix> - </manifest> - <manifestEntries> - <Class-Path>./</Class-Path> - </manifestEntries> - </archive> - </configuration> - </plugin> - - <plugin> - <artifactId>maven-assembly-plugin</artifactId> - <configuration> - <appendAssemblyId>false</appendAssemblyId> - <descriptors> - <descriptor>src/main/resources/assembly.xml</descriptor> - </descriptors> - </configuration> - <executions> - <execution> - <id>make-assembly</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - </build> - -</project> diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/ApplicationServer.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/ApplicationServer.java deleted file mode 100644 index b8ef1d840..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/ApplicationServer.java +++ /dev/null @@ -1,14 +0,0 @@ -package org.apache.hadoop.seaweed.ftp; - -import org.springframework.boot.SpringApplication; -import org.springframework.boot.autoconfigure.SpringBootApplication; - - -@SpringBootApplication -public class ApplicationServer { - - public static void main(String[] args) { - SpringApplication.run(ApplicationServer.class, args); - } - -} diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/config/SwaggerConfig.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/config/SwaggerConfig.java deleted file mode 100644 index 3c395493d..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/config/SwaggerConfig.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.config; - -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import springfox.documentation.builders.ApiInfoBuilder; -import springfox.documentation.builders.PathSelectors; -import springfox.documentation.builders.RequestHandlerSelectors; -import springfox.documentation.spi.DocumentationType; -import springfox.documentation.spring.web.plugins.Docket; -import springfox.documentation.swagger2.annotations.EnableSwagger2; - -@Configuration -@EnableSwagger2 -public class SwaggerConfig { - @Bean - public Docket createRestApi() { - return new Docket(DocumentationType.SWAGGER_2) - .pathMapping("/") - .select() - .apis(RequestHandlerSelectors.basePackage("org.apache.hadoop.seaweed.ftp")) - .paths(PathSelectors.any()) - .build().apiInfo(new ApiInfoBuilder() - .title("FTP API Doc") - .version("1.0") - .build()); - } -}
\ No newline at end of file diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/FtpManagerController.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/FtpManagerController.java deleted file mode 100644 index 7a5a4e74d..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/FtpManagerController.java +++ /dev/null @@ -1,71 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.controller; - -import io.swagger.annotations.Api; -import io.swagger.annotations.ApiOperation; -import org.apache.hadoop.seaweed.ftp.service.HFtpService; -import org.apache.hadoop.seaweed.ftp.controller.vo.Result; -import org.apache.log4j.Logger; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PutMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RestController; - -import java.util.HashMap; -import java.util.Map; - -@RestController -@RequestMapping("/manager") -@Api(tags = "FTP操作管理") -public class FtpManagerController { - - private static Logger log = Logger.getLogger(FtpManagerController.class); - - @Autowired - private HFtpService hdfsOverFtpServer; - - @GetMapping("/status") - @ApiOperation("查看FTP服务状态") - public Result status() { - Map map = new HashMap<>(); - try { - boolean status = hdfsOverFtpServer.statusServer(); - map.put("is_running", status); - return new Result(true, map, "FTP 服务状态获取成功"); - }catch (Exception e) { - log.error(e); - map.put("is_running", false); - return new Result(true, map, "FTP 服务状态获取成功"); - } - } - - @PutMapping("/start") - @ApiOperation("启动FTP服务") - public Result start() { - try { - boolean status = hdfsOverFtpServer.statusServer(); - if(!status) { - hdfsOverFtpServer.startServer(); - } - return new Result(true, "FTP 服务启动成功"); - }catch (Exception e) { - log.error(e); - return new Result(false, "FTP 服务启动失败"); - } - } - - @PutMapping("/stop") - @ApiOperation("停止FTP服务") - public Result stop() { - try { - boolean status = hdfsOverFtpServer.statusServer(); - if(status) { - hdfsOverFtpServer.stopServer(); - } - return new Result(true, "FTP 服务停止成功"); - }catch (Exception e) { - log.error(e); - return new Result(false, "FTP 服务停止失败"); - } - } -} diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/UserController.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/UserController.java deleted file mode 100644 index c4d2261b3..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/UserController.java +++ /dev/null @@ -1,98 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.controller; - -import io.swagger.annotations.Api; -import io.swagger.annotations.ApiOperation; -import org.apache.ftpserver.ftplet.User; -import org.apache.ftpserver.usermanager.Md5PasswordEncryptor; -import org.apache.ftpserver.usermanager.UserFactory; -import org.apache.hadoop.seaweed.ftp.controller.vo.FtpUser; -import org.apache.hadoop.seaweed.ftp.controller.vo.Result; -import org.apache.hadoop.seaweed.ftp.users.HdfsUserManager; -import org.apache.log4j.Logger; -import org.springframework.web.bind.annotation.*; - -import java.io.File; - -@RestController -@RequestMapping("/user") -@Api(tags = "FTP用户管理") -public class UserController { - - private static Logger log = Logger.getLogger(UserController.class); - - /*** - * { - * "name": "test", - * "password": "test", - * "homeDirectory": "/buckets/test/" - * } - * @param ftpUser - * @return - */ - @PostMapping("/add") - @ApiOperation("新增/编辑用户") - public Result add(@RequestBody FtpUser ftpUser) { - try { - HdfsUserManager userManagerFactory = new HdfsUserManager(); - userManagerFactory.setFile(new File(System.getProperty("user.dir") + File.separator + "users.properties")); - userManagerFactory.setPasswordEncryptor(new Md5PasswordEncryptor()); - - UserFactory userFactory = new UserFactory(); - userFactory.setHomeDirectory(ftpUser.getHomeDirectory()); - userFactory.setName(ftpUser.getName()); - userFactory.setPassword(ftpUser.getPassword()); - userFactory.setEnabled(ftpUser.isEnabled()); - userFactory.setMaxIdleTime(ftpUser.getMaxIdleTime()); - - User user = userFactory.createUser(); - userManagerFactory.save(user, ftpUser.isRenamePush()); - return new Result(true, "新建用户成功"); - }catch (Exception e) { - log.error(e); - return new Result(false, "新建用户失败"); - } - } - - @DeleteMapping("/delete/{user}") - @ApiOperation("删除用户") - public Result delete(@PathVariable(value = "user") String user) { - try { - HdfsUserManager userManagerFactory = new HdfsUserManager(); - userManagerFactory.setFile(new File(System.getProperty("user.dir") + File.separator + "users.properties")); - userManagerFactory.delete(user); - return new Result(true, "删除用户成功"); - }catch (Exception e) { - log.error(e); - return new Result(false, "删除用户失败"); - } - } - - @GetMapping("/show/{userName}") - @ApiOperation("查看用户") - public Result show(@PathVariable(value = "userName") String userName) { - try { - HdfsUserManager userManagerFactory = new HdfsUserManager(); - userManagerFactory.setFile(new File(System.getProperty("user.dir") + File.separator + "users.properties")); - User user = userManagerFactory.getUserByName(userName); - FtpUser ftpUser = new FtpUser(user.getHomeDirectory(), user.getPassword(), user.getEnabled(), user.getName(), user.getMaxIdleTime(), HdfsUserManager.getUserRenamePush(userName)); - return new Result(true, ftpUser, "获取用户信息成功"); - }catch (Exception e) { - log.error(e); - return new Result(false, "获取用户信息失败"); - } - } - - @GetMapping("/list") - @ApiOperation("列举用户") - public Result list() { - try { - HdfsUserManager userManagerFactory = new HdfsUserManager(); - userManagerFactory.setFile(new File(System.getProperty("user.dir") + File.separator + "users.properties")); - String[] allUserNames = userManagerFactory.getAllUserNames(); - return new Result(true, allUserNames, "列举用户成功"); - }catch (Exception e) { - log.error(e); - return new Result(false, "列举用户失败"); - } - } -} diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/vo/FtpUser.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/vo/FtpUser.java deleted file mode 100644 index 953d08603..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/vo/FtpUser.java +++ /dev/null @@ -1,71 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.controller.vo; - -public class FtpUser { - - private String homeDirectory; - private String password; - private boolean enabled; - private String name; - private int maxIdleTime; - private boolean renamePush; - - public FtpUser() { - } - - public FtpUser(String homeDirectory, String password, boolean enabled, String name, int maxIdleTime, boolean renamePush) { - this.homeDirectory = homeDirectory; - this.password = password; - this.enabled = enabled; - this.name = name; - this.maxIdleTime = maxIdleTime; - this.renamePush = renamePush; - } - - public String getHomeDirectory() { - return homeDirectory; - } - - public void setHomeDirectory(String homeDirectory) { - this.homeDirectory = homeDirectory; - } - - public String getPassword() { - return password; - } - - public void setPassword(String password) { - this.password = password; - } - - public boolean isEnabled() { - return enabled; - } - - public void setEnabled(boolean enabled) { - this.enabled = enabled; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public int getMaxIdleTime() { - return maxIdleTime; - } - - public void setMaxIdleTime(int maxIdleTime) { - this.maxIdleTime = maxIdleTime; - } - - public boolean isRenamePush() { - return renamePush; - } - - public void setRenamePush(boolean renamePush) { - this.renamePush = renamePush; - } -} diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/vo/Result.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/vo/Result.java deleted file mode 100644 index b6a480ba7..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/controller/vo/Result.java +++ /dev/null @@ -1,43 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.controller.vo; - -public class Result { - - private boolean status; - private Object data; - private String message; - - public Result(boolean status, String message) { - this.status = status; - this.message = message; - } - - public Result(boolean status, Object data, String message) { - this.status = status; - this.message = message; - this.data = data; - } - - public boolean isStatus() { - return status; - } - - public void setStatus(boolean status) { - this.status = status; - } - - public String getMessage() { - return message; - } - - public void setMessage(String message) { - this.message = message; - } - - public Object getData() { - return data; - } - - public void setData(Object data) { - this.data = data; - } -} diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/HFtpService.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/HFtpService.java deleted file mode 100644 index 9fe5dfd95..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/HFtpService.java +++ /dev/null @@ -1,102 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.service; - -import org.apache.ftpserver.DataConnectionConfiguration; -import org.apache.ftpserver.DataConnectionConfigurationFactory; -import org.apache.ftpserver.FtpServer; -import org.apache.ftpserver.FtpServerFactory; -import org.apache.ftpserver.command.CommandFactoryFactory; -import org.apache.ftpserver.listener.ListenerFactory; -import org.apache.hadoop.seaweed.ftp.service.filesystem.HdfsFileSystemManager; -import org.apache.hadoop.seaweed.ftp.service.filesystem.HdfsOverFtpSystem; -import org.apache.hadoop.seaweed.ftp.users.HdfsUserManager; -import org.apache.log4j.Logger; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Component; - -import java.io.File; - -/** - * reference: https://github.com/AShiou/hof - */ -@Component -public class HFtpService { - - private static Logger log = Logger.getLogger(HFtpService.class); - - @Value("${ftp.port}") - private int port = 0; - - @Value("${ftp.passive-address}") - private String passiveAddress; - - @Value("${ftp.passive-ports}") - private String passivePorts; - - @Value("${hdfs.uri}") - private String hdfsUri; - - @Value("${seaweedFs.enable}") - private boolean seaweedFsEnable; - - @Value("${seaweedFs.access}") - private String seaweedFsAccess; - - @Value("${seaweedFs.replication}") - private String seaweedFsReplication; - - private FtpServer ftpServer = null; - - public void startServer() throws Exception { - log.info("Starting HDFS-Over-Ftp server. port: " + port + " passive-address: " + passiveAddress + " passive-ports: " + passivePorts + " hdfs-uri: " + hdfsUri); - - HdfsOverFtpSystem.setHdfsUri(hdfsUri); - HdfsOverFtpSystem.setSeaweedFsEnable(seaweedFsEnable); - HdfsOverFtpSystem.setSeaweedFsAccess(seaweedFsAccess); - HdfsOverFtpSystem.setSeaweedFsReplication(seaweedFsReplication); - - FtpServerFactory server = new FtpServerFactory(); - server.setFileSystem(new HdfsFileSystemManager()); - - ListenerFactory factory = new ListenerFactory(); - factory.setPort(port); - - DataConnectionConfigurationFactory dccFactory = new DataConnectionConfigurationFactory(); - dccFactory.setPassiveAddress("0.0.0.0"); - dccFactory.setPassivePorts(passivePorts); - dccFactory.setPassiveExternalAddress(passiveAddress); - DataConnectionConfiguration dcc = dccFactory.createDataConnectionConfiguration(); - factory.setDataConnectionConfiguration(dcc); - - server.addListener("default", factory.createListener()); - - HdfsUserManager userManager = new HdfsUserManager(); - final File file = loadResource("/users.properties"); - userManager.setFile(file); - server.setUserManager(userManager); - - CommandFactoryFactory cmFact = new CommandFactoryFactory(); - cmFact.setUseDefaultCommands(true); - server.setCommandFactory(cmFact.createCommandFactory()); - - // start the server - ftpServer = server.createServer(); - ftpServer.start(); - } - - public void stopServer() { - log.info("Stopping Hdfs-Over-Ftp server. port: " + port + " passive-address: " + passiveAddress + " passive-ports: " + passivePorts + " hdfs-uri: " + hdfsUri); - ftpServer.stop(); - } - - public boolean statusServer() { - try { - return !ftpServer.isStopped(); - }catch (Exception e) { - return false; - } - } - - private static File loadResource(String resourceName) { - return new File(System.getProperty("user.dir") + resourceName); - } -}
\ No newline at end of file diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsFileObject.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsFileObject.java deleted file mode 100644 index e97c2dc14..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsFileObject.java +++ /dev/null @@ -1,333 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.service.filesystem; - -import org.apache.ftpserver.ftplet.FtpFile; -import org.apache.ftpserver.ftplet.User; -import org.apache.hadoop.fs.*; -import org.apache.hadoop.seaweed.ftp.users.HdfsUser; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -/** - * This class implements all actions to HDFS - */ -public class HdfsFileObject implements FtpFile { - - private final Logger log = LoggerFactory.getLogger(HdfsFileObject.class); - - private Path homePath; - private Path path; - private Path fullPath; - private HdfsUser user; - - /** - * Constructs HdfsFileObject from path - * - * @param path path to represent object - * @param user accessor of the object - */ - public HdfsFileObject(String homePath, String path, User user) { - this.homePath = new Path(homePath); - this.path = new Path(path); - this.fullPath = new Path(homePath + path); - this.user = (HdfsUser) user; - } - - public String getAbsolutePath() { - // strip the last '/' if necessary - String fullName = path.toString(); - int filelen = fullName.length(); - if ((filelen != 1) && (fullName.charAt(filelen - 1) == '/')) { - fullName = fullName.substring(0, filelen - 1); - } - - return fullName; - } - - public String getName() { - return path.getName(); - } - - /** - * HDFS has no hidden objects - * - * @return always false - */ - public boolean isHidden() { - return false; - } - - /** - * Checks if the object is a directory - * - * @return true if the object is a directory - */ - public boolean isDirectory() { - try { - log.debug("is directory? : " + fullPath); - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - FileStatus fs = dfs.getFileStatus(fullPath); - return fs.isDir(); - } catch (IOException e) { - log.debug(fullPath + " is not dir", e); - return false; - } - } - - /** - * Checks if the object is a file - * - * @return true if the object is a file - */ - public boolean isFile() { - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - return dfs.isFile(fullPath); - } catch (IOException e) { - log.debug(fullPath + " is not file", e); - return false; - } - } - - /** - * Checks if the object does exist - * - * @return true if the object does exist - */ - public boolean doesExist() { - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - dfs.getFileStatus(fullPath); - return true; - } catch (IOException e) { - // log.debug(path + " does not exist", e); - return false; - } - } - - public boolean isReadable() { - return true; - } - - public boolean isWritable() { - return true; - } - - public boolean isRemovable() { - return true; - } - - /** - * Get owner of the object - * - * @return owner of the object - */ - public String getOwnerName() { - return "root"; - /* - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - FileStatus fs = dfs.getFileStatus(fullPath); - String owner = fs.getOwner(); - if(owner.length() == 0) { - return "root"; - } - return owner; - } catch (IOException e) { - e.printStackTrace(); - return null; - } - */ - } - - /** - * Get group of the object - * - * @return group of the object - */ - public String getGroupName() { - return "root"; - /* - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - FileStatus fs = dfs.getFileStatus(fullPath); - String group = fs.getGroup(); - if(group.length() == 0) { - return "root"; - } - return group; - } catch (IOException e) { - e.printStackTrace(); - return null; - } - */ - } - - /** - * Get link count - * - * @return 3 is for a directory and 1 is for a file - */ - public int getLinkCount() { - return isDirectory() ? 3 : 1; - } - - /** - * Get last modification date - * - * @return last modification date as a long - */ - public long getLastModified() { - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - FileStatus fs = dfs.getFileStatus(fullPath); - return fs.getModificationTime(); - } catch (IOException e) { - e.printStackTrace(); - return 0; - } - } - - public boolean setLastModified(long l) { - return false; - } - - /** - * Get a size of the object - * - * @return size of the object in bytes - */ - public long getSize() { - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - FileStatus fs = dfs.getFileStatus(fullPath); - log.debug("getSize(): " + fullPath + " : " + fs.getLen()); - return fs.getLen(); - } catch (IOException e) { - e.printStackTrace(); - return 0; - } - } - - public Object getPhysicalFile() { - return null; - } - - /** - * Create a new dir from the object - * - * @return true if dir is created - */ - public boolean mkdir() { - try { - FileSystem fs = HdfsOverFtpSystem.getDfs(); - fs.mkdirs(fullPath); -// fs.setOwner(path, user.getName(), user.getMainGroup()); - return true; - } catch (IOException e) { - e.printStackTrace(); - return false; - } - } - - /** - * Delete object from the HDFS filesystem - * - * @return true if the object is deleted - */ - public boolean delete() { - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - dfs.delete(fullPath, true); - return true; - } catch (IOException e) { - e.printStackTrace(); - return false; - } - } - - public boolean move(FtpFile ftpFile) { - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - dfs.rename(fullPath, new Path(fullPath.getParent() + File.separator + ftpFile.getName())); - return true; - } catch (IOException e) { - e.printStackTrace(); - return false; - } - } - - - /** - * List files of the directory - * - * @return List of files in the directory - */ - public List<FtpFile> listFiles() { - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - FileStatus fileStats[] = dfs.listStatus(fullPath); - - // get the virtual name of the base directory - String virtualFileStr = getAbsolutePath(); - if (virtualFileStr.charAt(virtualFileStr.length() - 1) != '/') { - virtualFileStr += '/'; - } - - FtpFile[] virtualFiles = new FtpFile[fileStats.length]; - for (int i = 0; i < fileStats.length; i++) { - File fileObj = new File(fileStats[i].getPath().toString()); - String fileName = virtualFileStr + fileObj.getName(); - virtualFiles[i] = new HdfsFileObject(homePath.toString(), fileName, user); - } - return Collections.unmodifiableList(Arrays.asList(virtualFiles)); - } catch (IOException e) { - log.debug("", e); - return null; - } - } - - /** - * Creates output stream to write to the object - * - * @param l is not used here - * @return OutputStream - * @throws IOException - */ - public OutputStream createOutputStream(long l) { - try { - FileSystem fs = HdfsOverFtpSystem.getDfs(); - FSDataOutputStream out = fs.create(fullPath); -// fs.setOwner(fullPath, user.getName(), user.getMainGroup()); - return out; - } catch (IOException e) { - e.printStackTrace(); - return null; - } - } - - /** - * Creates input stream to read from the object - * - * @param l is not used here - * @return OutputStream - * @throws IOException - */ - public InputStream createInputStream(long l) { - try { - FileSystem dfs = HdfsOverFtpSystem.getDfs(); - FSDataInputStream in = dfs.open(fullPath); - return in; - } catch (IOException e) { - e.printStackTrace(); - return null; - } - } -} diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsFileSystemManager.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsFileSystemManager.java deleted file mode 100644 index 533c2c3aa..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsFileSystemManager.java +++ /dev/null @@ -1,14 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.service.filesystem; - -import org.apache.ftpserver.ftplet.FileSystemFactory; -import org.apache.ftpserver.ftplet.FileSystemView; -import org.apache.ftpserver.ftplet.User; - -/** - * Impelented FileSystemManager to use HdfsFileSystemView - */ -public class HdfsFileSystemManager implements FileSystemFactory { - public FileSystemView createFileSystemView(User user) { - return new HdfsFileSystemView(user); - } -} diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsFileSystemView.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsFileSystemView.java deleted file mode 100644 index 8b910e775..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsFileSystemView.java +++ /dev/null @@ -1,104 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.service.filesystem; - -import org.apache.ftpserver.ftplet.FileSystemView; -import org.apache.ftpserver.ftplet.FtpFile; -import org.apache.ftpserver.ftplet.User; -import org.apache.hadoop.fs.Path; - -import java.io.File; - -/** - * Implemented FileSystemView to use HdfsFileObject - */ -public class HdfsFileSystemView implements FileSystemView { - - private String homePath; - private String currPath = File.separator; - private User user; - - /** - * Constructor - set the user object. - */ - protected HdfsFileSystemView(User user) { - if (user == null) { - throw new IllegalArgumentException("user can not be null"); - } - if (user.getHomeDirectory() == null) { - throw new IllegalArgumentException( - "User home directory can not be null"); - } - - this.homePath = user.getHomeDirectory(); - this.user = user; - } - - public FtpFile getHomeDirectory() { - return new HdfsFileObject(homePath, File.separator, user); - } - - public FtpFile getWorkingDirectory() { - FtpFile fileObj; - if (currPath.equals(File.separator)) { - fileObj = new HdfsFileObject(homePath, File.separator, user); - } else { - fileObj = new HdfsFileObject(homePath, currPath, user); - - } - return fileObj; - } - - public boolean changeWorkingDirectory(String dir) { - - Path path; - if (dir.startsWith(File.separator) || new Path(currPath).equals(new Path(dir))) { - path = new Path(dir); - } else if (currPath.length() > 1) { - path = new Path(currPath + File.separator + dir); - } else { - if(dir.startsWith("/")) { - path = new Path(dir); - }else { - path = new Path(File.separator + dir); - } - } - - // 防止退回根目录 - if (path.getName().equals("..")) { - path = new Path(File.separator); - } - - HdfsFileObject file = new HdfsFileObject(homePath, path.toString(), user); - if (file.isDirectory()) { - currPath = path.toString(); - return true; - } else { - return false; - } - } - - public FtpFile getFile(String file) { - String path; - if (file.startsWith(File.separator)) { - path = file; - } else if (currPath.length() > 1) { - path = currPath + File.separator + file; - } else { - path = File.separator + file; - } - return new HdfsFileObject(homePath, path, user); - } - - /** - * Is the file content random accessible? - */ - public boolean isRandomAccessible() { - return true; - } - - /** - * Dispose file system view - does nothing. - */ - public void dispose() { - } - -} diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsOverFtpSystem.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsOverFtpSystem.java deleted file mode 100644 index 149fd6857..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/service/filesystem/HdfsOverFtpSystem.java +++ /dev/null @@ -1,72 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.service.filesystem; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; - -/** - * Class to store DFS connection - */ -public class HdfsOverFtpSystem { - - private static FileSystem fs = null; - - private static String hdfsUri; - - private static boolean seaweedFsEnable; - - private static String seaweedFsAccess; - - private static String seaweedFsReplication; - - private final static Logger log = LoggerFactory.getLogger(HdfsOverFtpSystem.class); - - private static void hdfsInit() throws IOException { - Configuration configuration = new Configuration(); - - configuration.set("fs.defaultFS", hdfsUri); - if(seaweedFsEnable) { - configuration.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); - configuration.set("fs.seaweed.volume.server.access", seaweedFsAccess); - configuration.set("fs.seaweed.replication", seaweedFsReplication); - } - fs = FileSystem.get(configuration); - log.info("HDFS load success"); - } - - /** - * Get dfs - * - * @return dfs - * @throws IOException - */ - public static FileSystem getDfs() throws IOException { - if (fs == null) { - hdfsInit(); - } - return fs; - } - - public static void setHdfsUri(String hdfsUri) { - HdfsOverFtpSystem.hdfsUri = hdfsUri; - } - - public static String getHdfsUri() { - return hdfsUri; - } - - public static void setSeaweedFsEnable(boolean seaweedFsEnable) { - HdfsOverFtpSystem.seaweedFsEnable = seaweedFsEnable; - } - - public static void setSeaweedFsAccess(String seaweedFsAccess) { - HdfsOverFtpSystem.seaweedFsAccess = seaweedFsAccess; - } - - public static void setSeaweedFsReplication(String seaweedFsReplication) { - HdfsOverFtpSystem.seaweedFsReplication = seaweedFsReplication; - } -}
\ No newline at end of file diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/users/HdfsUser.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/users/HdfsUser.java deleted file mode 100644 index c82f6516f..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/users/HdfsUser.java +++ /dev/null @@ -1,239 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.users; - -import org.apache.ftpserver.ftplet.Authority; -import org.apache.ftpserver.ftplet.AuthorizationRequest; -import org.apache.ftpserver.ftplet.User; -import org.apache.log4j.Logger; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -public class HdfsUser implements User, Serializable { - - private static final long serialVersionUID = -47371353779731294L; - - private String name = null; - - private String password = null; - - private int maxIdleTimeSec = 0; // no limit - - private String homeDir = null; - - private boolean isEnabled = true; - - private List<? extends Authority> authorities = new ArrayList<Authority>(); - - private ArrayList<String> groups = new ArrayList<String>(); - - private Logger log = Logger.getLogger(HdfsUser.class); - - /** - * Default constructor. - */ - public HdfsUser() { - } - - /** - * Copy constructor. - */ - public HdfsUser(User user) { - name = user.getName(); - password = user.getPassword(); - authorities = user.getAuthorities(); - maxIdleTimeSec = user.getMaxIdleTime(); - homeDir = user.getHomeDirectory(); - isEnabled = user.getEnabled(); - } - - public ArrayList<String> getGroups() { - return groups; - } - - /** - * Get the main group of the user - * - * @return main group of the user - */ - public String getMainGroup() { - if (groups.size() > 0) { - return groups.get(0); - } else { - log.error("User " + name + " is not a memer of any group"); - return "error"; - } - } - - /** - * Checks if user is a member of the group - * - * @param group to check - * @return true if the user id a member of the group - */ - public boolean isGroupMember(String group) { - for (String userGroup : groups) { - if (userGroup.equals(group)) { - return true; - } - } - return false; - } - - /** - * Set users' groups - * - * @param groups to set - */ - public void setGroups(ArrayList<String> groups) { - if (groups.size() < 1) { - log.error("User " + name + " is not a memer of any group"); - } - this.groups = groups; - } - - /** - * Get the user name. - */ - public String getName() { - return name; - } - - /** - * Set user name. - */ - public void setName(String name) { - this.name = name; - } - - /** - * Get the user password. - */ - public String getPassword() { - return password; - } - - /** - * Set user password. - */ - public void setPassword(String pass) { - password = pass; - } - - public List<Authority> getAuthorities() { - if (authorities != null) { - return Collections.unmodifiableList(authorities); - } else { - return null; - } - } - - public void setAuthorities(List<Authority> authorities) { - if (authorities != null) { - this.authorities = Collections.unmodifiableList(authorities); - } else { - this.authorities = null; - } - } - - /** - * Get the maximum idle time in second. - */ - public int getMaxIdleTime() { - return maxIdleTimeSec; - } - - /** - * Set the maximum idle time in second. - */ - public void setMaxIdleTime(int idleSec) { - maxIdleTimeSec = idleSec; - if (maxIdleTimeSec < 0) { - maxIdleTimeSec = 0; - } - } - - /** - * Get the user enable status. - */ - public boolean getEnabled() { - return isEnabled; - } - - /** - * Set the user enable status. - */ - public void setEnabled(boolean enb) { - isEnabled = enb; - } - - /** - * Get the user home directory. - */ - public String getHomeDirectory() { - return homeDir; - } - - /** - * Set the user home directory. - */ - public void setHomeDirectory(String home) { - homeDir = home; - } - - /** - * String representation. - */ - public String toString() { - return name; - } - - /** - * {@inheritDoc} - */ - public AuthorizationRequest authorize(AuthorizationRequest request) { - List<Authority> authorities = getAuthorities(); - - // check for no authorities at all - if (authorities == null) { - return null; - } - - boolean someoneCouldAuthorize = false; - for (Authority authority : authorities) { - if (authority.canAuthorize(request)) { - someoneCouldAuthorize = true; - - request = authority.authorize(request); - - // authorization failed, return null - if (request == null) { - return null; - } - } - - } - - if (someoneCouldAuthorize) { - return request; - } else { - return null; - } - } - - /** - * {@inheritDoc} - */ - public List<Authority> getAuthorities(Class<? extends Authority> clazz) { - List<Authority> selected = new ArrayList<Authority>(); - - for (Authority authority : authorities) { - if (authority.getClass().equals(clazz)) { - selected.add(authority); - } - } - - return selected; - } -} diff --git a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/users/HdfsUserManager.java b/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/users/HdfsUserManager.java deleted file mode 100644 index 7eb296160..000000000 --- a/other/java/hdfs-over-ftp/src/main/java/org/apache/hadoop/seaweed/ftp/users/HdfsUserManager.java +++ /dev/null @@ -1,453 +0,0 @@ -package org.apache.hadoop.seaweed.ftp.users; - -import org.apache.ftpserver.FtpServerConfigurationException; -import org.apache.ftpserver.ftplet.*; -import org.apache.ftpserver.usermanager.*; -import org.apache.ftpserver.usermanager.impl.*; -import org.apache.ftpserver.util.BaseProperties; -import org.apache.ftpserver.util.IoUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.*; - -public class HdfsUserManager extends AbstractUserManager { - - private final Logger LOG = LoggerFactory - .getLogger(HdfsUserManager.class); - - private final static String DEPRECATED_PREFIX = "FtpServer.user."; - - private final static String PREFIX = "ftpserver.user."; - - private static BaseProperties userDataProp; - - private File userDataFile = new File("users.conf"); - - private boolean isConfigured = false; - - private PasswordEncryptor passwordEncryptor = new Md5PasswordEncryptor(); - - - /** - * Retrieve the file used to load and store users - * - * @return The file - */ - public File getFile() { - return userDataFile; - } - - /** - * Set the file used to store and read users. Must be set before - * {@link #configure()} is called. - * - * @param propFile A file containing users - */ - public void setFile(File propFile) { - if (isConfigured) { - throw new IllegalStateException("Must be called before configure()"); - } - - this.userDataFile = propFile; - } - - - /** - * Retrieve the password encryptor used for this user manager - * - * @return The password encryptor. Default to {@link Md5PasswordEncryptor} - * if no other has been provided - */ - public PasswordEncryptor getPasswordEncryptor() { - return passwordEncryptor; - } - - - /** - * Set the password encryptor to use for this user manager - * - * @param passwordEncryptor The password encryptor - */ - public void setPasswordEncryptor(PasswordEncryptor passwordEncryptor) { - this.passwordEncryptor = passwordEncryptor; - } - - - /** - * Lazy init the user manager - */ - private void lazyInit() { - if (!isConfigured) { - configure(); - } - } - - /** - * Configure user manager. - */ - public void configure() { - isConfigured = true; - try { - userDataProp = new BaseProperties(); - - if (userDataFile != null && userDataFile.exists()) { - FileInputStream fis = null; - try { - fis = new FileInputStream(userDataFile); - userDataProp.load(fis); - } finally { - IoUtils.close(fis); - } - } - } catch (IOException e) { - throw new FtpServerConfigurationException( - "Error loading user data file : " - + userDataFile.getAbsolutePath(), e); - } - - convertDeprecatedPropertyNames(); - } - - private void convertDeprecatedPropertyNames() { - Enumeration<?> keys = userDataProp.propertyNames(); - - boolean doSave = false; - - while (keys.hasMoreElements()) { - String key = (String) keys.nextElement(); - - if (key.startsWith(DEPRECATED_PREFIX)) { - String newKey = PREFIX - + key.substring(DEPRECATED_PREFIX.length()); - userDataProp.setProperty(newKey, userDataProp.getProperty(key)); - userDataProp.remove(key); - - doSave = true; - } - } - - if (doSave) { - try { - saveUserData(); - } catch (FtpException e) { - throw new FtpServerConfigurationException( - "Failed to save updated user data", e); - } - } - } - - public synchronized void save(User usr, boolean renamePush) throws FtpException { - lazyInit(); - userDataProp.setProperty(PREFIX + usr.getName() + ".rename.push", renamePush); - save(usr); - } - - /** - * Save user data. Store the properties. - */ - public synchronized void save(User usr) throws FtpException { - lazyInit(); - - // null value check - if (usr.getName() == null) { - throw new NullPointerException("User name is null."); - } - String thisPrefix = PREFIX + usr.getName() + '.'; - - // set other properties - userDataProp.setProperty(thisPrefix + ATTR_PASSWORD, getPassword(usr)); - - String home = usr.getHomeDirectory(); - if (home == null) { - home = "/"; - } - userDataProp.setProperty(thisPrefix + ATTR_HOME, home); - userDataProp.setProperty(thisPrefix + ATTR_ENABLE, usr.getEnabled()); - userDataProp.setProperty(thisPrefix + ATTR_WRITE_PERM, usr - .authorize(new WriteRequest()) != null); - userDataProp.setProperty(thisPrefix + ATTR_MAX_IDLE_TIME, usr - .getMaxIdleTime()); - - TransferRateRequest transferRateRequest = new TransferRateRequest(); - transferRateRequest = (TransferRateRequest) usr - .authorize(transferRateRequest); - - if (transferRateRequest != null) { - userDataProp.setProperty(thisPrefix + ATTR_MAX_UPLOAD_RATE, - transferRateRequest.getMaxUploadRate()); - userDataProp.setProperty(thisPrefix + ATTR_MAX_DOWNLOAD_RATE, - transferRateRequest.getMaxDownloadRate()); - } else { - userDataProp.remove(thisPrefix + ATTR_MAX_UPLOAD_RATE); - userDataProp.remove(thisPrefix + ATTR_MAX_DOWNLOAD_RATE); - } - - // request that always will succeed - ConcurrentLoginRequest concurrentLoginRequest = new ConcurrentLoginRequest( - 0, 0); - concurrentLoginRequest = (ConcurrentLoginRequest) usr - .authorize(concurrentLoginRequest); - - if (concurrentLoginRequest != null) { - userDataProp.setProperty(thisPrefix + ATTR_MAX_LOGIN_NUMBER, - concurrentLoginRequest.getMaxConcurrentLogins()); - userDataProp.setProperty(thisPrefix + ATTR_MAX_LOGIN_PER_IP, - concurrentLoginRequest.getMaxConcurrentLoginsPerIP()); - } else { - userDataProp.remove(thisPrefix + ATTR_MAX_LOGIN_NUMBER); - userDataProp.remove(thisPrefix + ATTR_MAX_LOGIN_PER_IP); - } - - saveUserData(); - } - - /** - * @throws FtpException - */ - private void saveUserData() throws FtpException { - File dir = userDataFile.getAbsoluteFile().getParentFile(); - if (dir != null && !dir.exists() && !dir.mkdirs()) { - String dirName = dir.getAbsolutePath(); - throw new FtpServerConfigurationException( - "Cannot create directory for user data file : " + dirName); - } - - // save user data - FileOutputStream fos = null; - try { - fos = new FileOutputStream(userDataFile); - userDataProp.store(fos, "Generated file - don't edit (please)"); - } catch (IOException ex) { - LOG.error("Failed saving user data", ex); - throw new FtpException("Failed saving user data", ex); - } finally { - IoUtils.close(fos); - } - } - - - public synchronized void list() throws FtpException { - lazyInit(); - - Map dataMap = new HashMap(); - Enumeration<String> propNames = (Enumeration<String>) userDataProp.propertyNames(); - ArrayList<String> a = Collections.list(propNames); - a.remove("i18nMap");//去除i18nMap - for(String attrName : a){ -// dataMap.put(attrName, propNames.); - } - - } - - /** - * Delete an user. Removes all this user entries from the properties. After - * removing the corresponding from the properties, save the data. - */ - public synchronized void delete(String usrName) throws FtpException { - lazyInit(); - - // remove entries from properties - String thisPrefix = PREFIX + usrName + '.'; - Enumeration<?> propNames = userDataProp.propertyNames(); - ArrayList<String> remKeys = new ArrayList<String>(); - while (propNames.hasMoreElements()) { - String thisKey = propNames.nextElement().toString(); - if (thisKey.startsWith(thisPrefix)) { - remKeys.add(thisKey); - } - } - Iterator<String> remKeysIt = remKeys.iterator(); - while (remKeysIt.hasNext()) { - userDataProp.remove(remKeysIt.next()); - } - - saveUserData(); - } - - /** - * Get user password. Returns the encrypted value. - * <p/> - * <pre> - * If the password value is not null - * password = new password - * else - * if user does exist - * password = old password - * else - * password = "" - * </pre> - */ - private String getPassword(User usr) { - String name = usr.getName(); - String password = usr.getPassword(); - - if (password != null) { - password = passwordEncryptor.encrypt(password); - } else { - String blankPassword = passwordEncryptor.encrypt(""); - - if (doesExist(name)) { - String key = PREFIX + name + '.' + ATTR_PASSWORD; - password = userDataProp.getProperty(key, blankPassword); - } else { - password = blankPassword; - } - } - return password; - } - - /** - * Get all user names. - */ - public synchronized String[] getAllUserNames() { - lazyInit(); - - // get all user names - String suffix = '.' + ATTR_HOME; - ArrayList<String> ulst = new ArrayList<String>(); - Enumeration<?> allKeys = userDataProp.propertyNames(); - int prefixlen = PREFIX.length(); - int suffixlen = suffix.length(); - while (allKeys.hasMoreElements()) { - String key = (String) allKeys.nextElement(); - if (key.endsWith(suffix)) { - String name = key.substring(prefixlen); - int endIndex = name.length() - suffixlen; - name = name.substring(0, endIndex); - ulst.add(name); - } - } - - Collections.sort(ulst); - return ulst.toArray(new String[0]); - } - - private ArrayList<String> parseGroups(String groupsLine) { - String groupsArray[] = groupsLine.split(","); - return new ArrayList(Arrays.asList(groupsArray)); - } - - public static synchronized boolean getUserRenamePush(String userName) { - return userDataProp.getBoolean(PREFIX + userName + ".rename.push", false); - } - - /** - * Load user data. - */ - public synchronized User getUserByName(String userName) { - lazyInit(); - - if (!doesExist(userName)) { - return null; - } - - String baseKey = PREFIX + userName + '.'; - HdfsUser user = new HdfsUser(); - user.setName(userName); - user.setEnabled(userDataProp.getBoolean(baseKey + ATTR_ENABLE, true)); - user.setHomeDirectory(userDataProp - .getProperty(baseKey + ATTR_HOME, "/")); - -// user.setGroups(parseGroups(userDataProp -// .getProperty(baseKey + "groups"))); - - List<Authority> authorities = new ArrayList<Authority>(); - - if (userDataProp.getBoolean(baseKey + ATTR_WRITE_PERM, false)) { - authorities.add(new WritePermission()); - } - - int maxLogin = userDataProp.getInteger(baseKey + ATTR_MAX_LOGIN_NUMBER, - 0); - int maxLoginPerIP = userDataProp.getInteger(baseKey - + ATTR_MAX_LOGIN_PER_IP, 0); - - authorities.add(new ConcurrentLoginPermission(maxLogin, maxLoginPerIP)); - - int uploadRate = userDataProp.getInteger( - baseKey + ATTR_MAX_UPLOAD_RATE, 0); - int downloadRate = userDataProp.getInteger(baseKey - + ATTR_MAX_DOWNLOAD_RATE, 0); - - authorities.add(new TransferRatePermission(downloadRate, uploadRate)); - - user.setAuthorities(authorities); - - user.setMaxIdleTime(userDataProp.getInteger(baseKey - + ATTR_MAX_IDLE_TIME, 0)); - - return user; - } - - /** - * User existance check - */ - public synchronized boolean doesExist(String name) { - lazyInit(); - - String key = PREFIX + name + '.' + ATTR_HOME; - return userDataProp.containsKey(key); - } - - /** - * User authenticate method - */ - public synchronized User authenticate(Authentication authentication) - throws AuthenticationFailedException { - lazyInit(); - - if (authentication instanceof UsernamePasswordAuthentication) { - UsernamePasswordAuthentication upauth = (UsernamePasswordAuthentication) authentication; - - String user = upauth.getUsername(); - String password = upauth.getPassword(); - - if (user == null) { - throw new AuthenticationFailedException("Authentication failed"); - } - - if (password == null) { - password = ""; - } - - String storedPassword = userDataProp.getProperty(PREFIX + user + '.' - + ATTR_PASSWORD); - - if (storedPassword == null) { - // user does not exist - throw new AuthenticationFailedException("Authentication failed"); - } - - if (passwordEncryptor.matches(password, storedPassword)) { - return getUserByName(user); - } else { - throw new AuthenticationFailedException("Authentication failed"); - } - - } else if (authentication instanceof AnonymousAuthentication) { - if (doesExist("anonymous")) { - return getUserByName("anonymous"); - } else { - throw new AuthenticationFailedException("Authentication failed"); - } - } else { - throw new IllegalArgumentException( - "Authentication not supported by this user manager"); - } - } - - /** - * Close the user manager - remove existing entries. - */ - public synchronized void dispose() { - if (userDataProp != null) { - userDataProp.clear(); - userDataProp = null; - } - } -} diff --git a/other/java/hdfs-over-ftp/src/main/resources/application.yml b/other/java/hdfs-over-ftp/src/main/resources/application.yml deleted file mode 100644 index 128bab1f9..000000000 --- a/other/java/hdfs-over-ftp/src/main/resources/application.yml +++ /dev/null @@ -1,15 +0,0 @@ -server: - port: 8080 - -ftp: - port: 2222 - passive-address: localhost - passive-ports: 30000-30999 - -hdfs: - uri: seaweedfs://localhost:8888 - -seaweedFs: - enable: true - access: direct # direct/filerProxy/publicUrl - replication: "000"
\ No newline at end of file diff --git a/other/java/hdfs-over-ftp/src/main/resources/assembly.xml b/other/java/hdfs-over-ftp/src/main/resources/assembly.xml deleted file mode 100644 index 84fef56f8..000000000 --- a/other/java/hdfs-over-ftp/src/main/resources/assembly.xml +++ /dev/null @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd"> - - <id>package</id> - <formats> - <!-- 指定打包格式,支持的打包格式有zip、tar、tar.gz (or tgz)、tar.bz2 (or tbz2)、jar、dir、war,可以同时指定多个打包格式 --> - <format>tar.gz</format> - </formats> - <includeBaseDirectory>false</includeBaseDirectory> - - <fileSets> - <fileSet> - <directory>src/main/resources</directory> - <outputDirectory>/</outputDirectory> - <includes> - <include>application.yml</include> - <include>logback-spring.xml</include> - <include>users.properties</include> - <include>kafka-producer.properties</include> - </includes> - </fileSet> - <fileSet> - <directory>${project.build.directory}</directory> - <outputDirectory>/</outputDirectory> - <includes> - <include>*.jar</include> - </includes> - </fileSet> - </fileSets> - <dependencySets> - <dependencySet> - <useProjectArtifact>false</useProjectArtifact> - <outputDirectory>lib</outputDirectory> - <scope>runtime</scope> - <unpack>false</unpack> - </dependencySet> - </dependencySets> -</assembly> diff --git a/other/java/hdfs-over-ftp/src/main/resources/logback-spring.xml b/other/java/hdfs-over-ftp/src/main/resources/logback-spring.xml deleted file mode 100644 index 96b4c1d71..000000000 --- a/other/java/hdfs-over-ftp/src/main/resources/logback-spring.xml +++ /dev/null @@ -1,40 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<configuration> - <!--定义日志文件的存储地址 勿在 LogBack 的配置中使用相对路径--> - <property name="LOG_HOME" value="${user.dir}/logs/" /> - - <!-- 控制台输出 --> - <appender name="Stdout" class="ch.qos.logback.core.ConsoleAppender"> - <!-- 日志输出编码 --> - <layout class="ch.qos.logback.classic.PatternLayout"> - <!--格式化输出:%d表示日期,%thread表示线程名,%-5level:级别从左显示5个字符宽度%msg:日志消息,%n是换行符--> - <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n - </pattern> - </layout> - </appender> - - <!-- 按照每天生成日志文件 --> - <appender name="RollingFile" - class="ch.qos.logback.core.rolling.RollingFileAppender"> - <File>${LOG_HOME}/fileLog.log</File> - <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> - <fileNamePattern>${LOG_HOME}/fileLog.log.%d.%i</fileNamePattern> - <timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> - <maxFileSize>100 MB</maxFileSize> - </timeBasedFileNamingAndTriggeringPolicy> - </rollingPolicy> - <encoder> - <pattern> - %d %p (%file:%line\)- %m%n - </pattern> - <charset>UTF-8</charset> - </encoder> - </appender> - - <!-- 日志输出级别 --> - <root level="info"> - <appender-ref ref="Stdout" /> - <appender-ref ref="RollingFile" /> - </root> - -</configuration>
\ No newline at end of file diff --git a/other/java/hdfs-over-ftp/users.properties b/other/java/hdfs-over-ftp/users.properties deleted file mode 100644 index aeeab8e35..000000000 --- a/other/java/hdfs-over-ftp/users.properties +++ /dev/null @@ -1,12 +0,0 @@ -#Generated file - don't edit (please) -#Thu Mar 11 19:11:12 CST 2021 -ftpserver.user.test.idletime=0 -ftpserver.user.test.maxloginperip=0 -ftpserver.user.test.userpassword=44664D4D827C740293D2AA244FB60445 -ftpserver.user.test.enableflag=true -ftpserver.user.test.maxloginnumber=0 -ftpserver.user.test.rename.push=true -ftpserver.user.test.homedirectory=/buckets/test/ -ftpserver.user.test.downloadrate=0 -ftpserver.user.test.writepermission=true -ftpserver.user.test.uploadrate=0 diff --git a/other/java/hdfs2/README.md b/other/java/hdfs2/README.md deleted file mode 100644 index e98b06506..000000000 --- a/other/java/hdfs2/README.md +++ /dev/null @@ -1,190 +0,0 @@ -# SeaweedFS Hadoop2 Client - -Hadoop FileSystem implementation for SeaweedFS, compatible with Hadoop 2.x/3.x. - -## Building - -```bash -mvn clean install -``` - -## Testing - -This project includes two types of tests: - -### 1. Configuration Tests (No SeaweedFS Required) - -These tests verify configuration handling and initialization logic without requiring a running SeaweedFS instance: - -```bash -mvn test -Dtest=SeaweedFileSystemConfigTest -``` - -### 2. Integration Tests (Requires SeaweedFS) - -These tests verify actual FileSystem operations against a running SeaweedFS instance. - -#### Prerequisites - -1. Start SeaweedFS with default ports: - ```bash - # Terminal 1: Start master - weed master - - # Terminal 2: Start volume server - weed volume -mserver=localhost:9333 - - # Terminal 3: Start filer - weed filer -master=localhost:9333 - ``` - -2. Verify services are running: - - Master: http://localhost:9333 - - Filer HTTP: http://localhost:8888 - - Filer gRPC: localhost:18888 - -#### Running Integration Tests - -```bash -# Enable integration tests -export SEAWEEDFS_TEST_ENABLED=true - -# Run all tests -mvn test - -# Run specific test -mvn test -Dtest=SeaweedFileSystemTest -``` - -### Test Configuration - -Integration tests can be configured via environment variables or system properties: - -- `SEAWEEDFS_TEST_ENABLED`: Set to `true` to enable integration tests (default: false) -- Tests use these default connection settings: - - Filer Host: localhost - - Filer HTTP Port: 8888 - - Filer gRPC Port: 18888 - -### Running Tests with Custom Configuration - -To test against a different SeaweedFS instance, modify the test code or use Hadoop configuration: - -```java -conf.set("fs.seaweed.filer.host", "your-host"); -conf.setInt("fs.seaweed.filer.port", 8888); -conf.setInt("fs.seaweed.filer.port.grpc", 18888); -``` - -## Test Coverage - -The test suite covers: - -- **Configuration & Initialization** - - URI parsing and configuration - - Default values - - Configuration overrides - - Working directory management - -- **File Operations** - - Create files - - Read files - - Write files - - Append to files - - Delete files - -- **Directory Operations** - - Create directories - - List directory contents - - Delete directories (recursive and non-recursive) - -- **Metadata Operations** - - Get file status - - Set permissions - - Set owner/group - - Rename files and directories - -## Usage in Hadoop - -1. Copy the built JAR to your Hadoop classpath: - ```bash - cp target/seaweedfs-hadoop2-client-*.jar $HADOOP_HOME/share/hadoop/common/lib/ - ``` - -2. Configure `core-site.xml`: - ```xml - <configuration> - <property> - <name>fs.seaweedfs.impl</name> - <value>seaweed.hdfs.SeaweedFileSystem</value> - </property> - <property> - <name>fs.seaweed.filer.host</name> - <value>localhost</value> - </property> - <property> - <name>fs.seaweed.filer.port</name> - <value>8888</value> - </property> - <property> - <name>fs.seaweed.filer.port.grpc</name> - <value>18888</value> - </property> - </configuration> - ``` - -3. Use SeaweedFS with Hadoop commands: - ```bash - hadoop fs -ls seaweedfs://localhost:8888/ - hadoop fs -mkdir seaweedfs://localhost:8888/test - hadoop fs -put local.txt seaweedfs://localhost:8888/test/ - ``` - -## Continuous Integration - -For CI environments, tests can be run in two modes: - -1. **Configuration Tests Only** (default, no SeaweedFS required): - ```bash - mvn test -Dtest=SeaweedFileSystemConfigTest - ``` - -2. **Full Integration Tests** (requires SeaweedFS): - ```bash - # Start SeaweedFS in CI environment - # Then run: - export SEAWEEDFS_TEST_ENABLED=true - mvn test - ``` - -## Troubleshooting - -### Tests are skipped - -If you see "Skipping test - SEAWEEDFS_TEST_ENABLED not set": -```bash -export SEAWEEDFS_TEST_ENABLED=true -``` - -### Connection refused errors - -Ensure SeaweedFS is running and accessible: -```bash -curl http://localhost:8888/ -``` - -### gRPC errors - -Verify the gRPC port is accessible: -```bash -# Should show the port is listening -netstat -an | grep 18888 -``` - -## Contributing - -When adding new features, please include: -1. Configuration tests (no SeaweedFS required) -2. Integration tests (with SEAWEEDFS_TEST_ENABLED guard) -3. Documentation updates - diff --git a/other/java/hdfs2/dependency-reduced-pom.xml b/other/java/hdfs2/dependency-reduced-pom.xml deleted file mode 100644 index fd84befa0..000000000 --- a/other/java/hdfs2/dependency-reduced-pom.xml +++ /dev/null @@ -1,333 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>com.seaweedfs</groupId>
- <artifactId>seaweedfs-hadoop2-client</artifactId>
- <name>SeaweedFS HDFS2 Client</name>
- <version>${seaweedfs.client.version}</version>
- <description>A java client for SeaweedFS.</description>
- <url>https://github.com/seaweedfs/seaweedfs</url>
- <developers>
- <developer>
- <name>Chris Lu</name>
- <email>chris.lu@gmail.com</email>
- <organization>SeaweedFS</organization>
- <organizationUrl>https://seaweedfs.com</organizationUrl>
- </developer>
- </developers>
- <licenses>
- <license>
- <name>The Apache License, Version 2.0</name>
- <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
- </license>
- </licenses>
- <scm>
- <connection>scm:git:git://github.com/seaweedfs/seaweedfs.git</connection>
- <developerConnection>scm:git:ssh://github.com:seaweedfs/seaweedfs.git</developerConnection>
- <url>https://github.com/seaweedfs/seaweedfs/tree/master</url>
- </scm>
- <build>
- <plugins>
- <plugin>
- <artifactId>maven-compiler-plugin</artifactId>
- <configuration>
- <source>8</source>
- <target>8</target>
- <release>8</release>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-shade-plugin</artifactId>
- <version>3.2.1</version>
- <executions>
- <execution>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <filters>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>META-INF/*.SF</exclude>
- <exclude>META-INF/*.DSA</exclude>
- <exclude>META-INF/*.RSA</exclude>
- <exclude>org/slf4j/**</exclude>
- <exclude>META-INF/maven/org.slf4j/**</exclude>
- </excludes>
- </filter>
- </filters>
- <transformers>
- <transformer />
- </transformers>
- <relocations>
- <relocation>
- <pattern>com.google</pattern>
- <shadedPattern>shaded.com.google</shadedPattern>
- </relocation>
- <relocation>
- <pattern>io.grpc.internal</pattern>
- <shadedPattern>shaded.io.grpc.internal</shadedPattern>
- </relocation>
- <relocation>
- <pattern>org.apache.commons</pattern>
- <shadedPattern>shaded.org.apache.commons</shadedPattern>
- <excludes>
- <exclude>org.apache.hadoop</exclude>
- <exclude>org.apache.log4j</exclude>
- </excludes>
- </relocation>
- <relocation>
- <pattern>org.apache.http</pattern>
- <shadedPattern>shaded.org.apache.http</shadedPattern>
- </relocation>
- </relocations>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <artifactId>maven-gpg-plugin</artifactId>
- <version>1.5</version>
- <executions>
- <execution>
- <id>sign-artifacts</id>
- <phase>verify</phase>
- <goals>
- <goal>sign</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.sonatype.central</groupId>
- <artifactId>central-publishing-maven-plugin</artifactId>
- <version>0.5.0</version>
- <extensions>true</extensions>
- <configuration>
- <publishingServerId>central</publishingServerId>
- <autoPublish>true</autoPublish>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-source-plugin</artifactId>
- <version>2.2.1</version>
- <executions>
- <execution>
- <id>attach-sources</id>
- <goals>
- <goal>jar-no-fork</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <artifactId>maven-javadoc-plugin</artifactId>
- <version>3.0.1</version>
- <executions>
- <execution>
- <id>attach-javadocs</id>
- <goals>
- <goal>jar</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <version>3.2.4</version>
- <scope>provided</scope>
- <exclusions>
- <exclusion>
- <artifactId>hadoop-hdfs-client</artifactId>
- <groupId>org.apache.hadoop</groupId>
- </exclusion>
- <exclusion>
- <artifactId>hadoop-yarn-api</artifactId>
- <groupId>org.apache.hadoop</groupId>
- </exclusion>
- <exclusion>
- <artifactId>hadoop-yarn-client</artifactId>
- <groupId>org.apache.hadoop</groupId>
- </exclusion>
- <exclusion>
- <artifactId>hadoop-mapreduce-client-core</artifactId>
- <groupId>org.apache.hadoop</groupId>
- </exclusion>
- <exclusion>
- <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
- <groupId>org.apache.hadoop</groupId>
- </exclusion>
- <exclusion>
- <artifactId>hadoop-annotations</artifactId>
- <groupId>org.apache.hadoop</groupId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>3.2.4</version>
- <scope>provided</scope>
- <exclusions>
- <exclusion>
- <artifactId>commons-cli</artifactId>
- <groupId>commons-cli</groupId>
- </exclusion>
- <exclusion>
- <artifactId>commons-math3</artifactId>
- <groupId>org.apache.commons</groupId>
- </exclusion>
- <exclusion>
- <artifactId>commons-io</artifactId>
- <groupId>commons-io</groupId>
- </exclusion>
- <exclusion>
- <artifactId>commons-net</artifactId>
- <groupId>commons-net</groupId>
- </exclusion>
- <exclusion>
- <artifactId>commons-collections</artifactId>
- <groupId>commons-collections</groupId>
- </exclusion>
- <exclusion>
- <artifactId>javax.servlet-api</artifactId>
- <groupId>javax.servlet</groupId>
- </exclusion>
- <exclusion>
- <artifactId>javax.activation-api</artifactId>
- <groupId>javax.activation</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jetty-server</artifactId>
- <groupId>org.eclipse.jetty</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jetty-util</artifactId>
- <groupId>org.eclipse.jetty</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jetty-servlet</artifactId>
- <groupId>org.eclipse.jetty</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jetty-webapp</artifactId>
- <groupId>org.eclipse.jetty</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jsp-api</artifactId>
- <groupId>javax.servlet.jsp</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jersey-core</artifactId>
- <groupId>com.sun.jersey</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jersey-servlet</artifactId>
- <groupId>com.sun.jersey</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jersey-json</artifactId>
- <groupId>com.sun.jersey</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jersey-server</artifactId>
- <groupId>com.sun.jersey</groupId>
- </exclusion>
- <exclusion>
- <artifactId>reload4j</artifactId>
- <groupId>ch.qos.reload4j</groupId>
- </exclusion>
- <exclusion>
- <artifactId>commons-beanutils</artifactId>
- <groupId>commons-beanutils</groupId>
- </exclusion>
- <exclusion>
- <artifactId>commons-configuration2</artifactId>
- <groupId>org.apache.commons</groupId>
- </exclusion>
- <exclusion>
- <artifactId>commons-lang3</artifactId>
- <groupId>org.apache.commons</groupId>
- </exclusion>
- <exclusion>
- <artifactId>commons-text</artifactId>
- <groupId>org.apache.commons</groupId>
- </exclusion>
- <exclusion>
- <artifactId>slf4j-reload4j</artifactId>
- <groupId>org.slf4j</groupId>
- </exclusion>
- <exclusion>
- <artifactId>avro</artifactId>
- <groupId>org.apache.avro</groupId>
- </exclusion>
- <exclusion>
- <artifactId>re2j</artifactId>
- <groupId>com.google.re2j</groupId>
- </exclusion>
- <exclusion>
- <artifactId>hadoop-auth</artifactId>
- <groupId>org.apache.hadoop</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jsch</artifactId>
- <groupId>com.jcraft</groupId>
- </exclusion>
- <exclusion>
- <artifactId>curator-client</artifactId>
- <groupId>org.apache.curator</groupId>
- </exclusion>
- <exclusion>
- <artifactId>curator-recipes</artifactId>
- <groupId>org.apache.curator</groupId>
- </exclusion>
- <exclusion>
- <artifactId>htrace-core4</artifactId>
- <groupId>org.apache.htrace</groupId>
- </exclusion>
- <exclusion>
- <artifactId>zookeeper</artifactId>
- <groupId>org.apache.zookeeper</groupId>
- </exclusion>
- <exclusion>
- <artifactId>commons-compress</artifactId>
- <groupId>org.apache.commons</groupId>
- </exclusion>
- <exclusion>
- <artifactId>kerb-simplekdc</artifactId>
- <groupId>org.apache.kerby</groupId>
- </exclusion>
- <exclusion>
- <artifactId>jackson-databind</artifactId>
- <groupId>com.fasterxml.jackson.core</groupId>
- </exclusion>
- <exclusion>
- <artifactId>stax2-api</artifactId>
- <groupId>org.codehaus.woodstox</groupId>
- </exclusion>
- <exclusion>
- <artifactId>woodstox-core</artifactId>
- <groupId>com.fasterxml.woodstox</groupId>
- </exclusion>
- <exclusion>
- <artifactId>dnsjava</artifactId>
- <groupId>dnsjava</groupId>
- </exclusion>
- <exclusion>
- <artifactId>hadoop-annotations</artifactId>
- <groupId>org.apache.hadoop</groupId>
- </exclusion>
- </exclusions>
- </dependency>
- </dependencies>
- <properties>
- <seaweedfs.client.version>3.80</seaweedfs.client.version>
- <hadoop.version>3.2.4</hadoop.version>
- </properties>
-</project>
diff --git a/other/java/hdfs2/pom.xml b/other/java/hdfs2/pom.xml deleted file mode 100644 index 7b4c2507d..000000000 --- a/other/java/hdfs2/pom.xml +++ /dev/null @@ -1,195 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project xmlns="http://maven.apache.org/POM/4.0.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <properties> - <seaweedfs.client.version>3.80</seaweedfs.client.version> - <hadoop.version>3.4.0</hadoop.version> - </properties> - - <groupId>com.seaweedfs</groupId> - <artifactId>seaweedfs-hadoop2-client</artifactId> - <version>${seaweedfs.client.version}</version> - - <name>SeaweedFS HDFS2 Client</name> - <description>A java client for SeaweedFS.</description> - <url>https://github.com/seaweedfs/seaweedfs</url> - <licenses> - <license> - <name>The Apache License, Version 2.0</name> - <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url> - </license> - </licenses> - <developers> - <developer> - <name>Chris Lu</name> - <email>chris.lu@gmail.com</email> - <organization>SeaweedFS</organization> - <organizationUrl>https://seaweedfs.com</organizationUrl> - </developer> - </developers> - <scm> - <connection>scm:git:git://github.com/seaweedfs/seaweedfs.git</connection> - <developerConnection>scm:git:ssh://github.com:seaweedfs/seaweedfs.git</developerConnection> - <url>https://github.com/seaweedfs/seaweedfs/tree/master</url> - </scm> - - <build> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <configuration> - <source>8</source> - <target>8</target> - <release>8</release> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-shade-plugin</artifactId> - <version>3.2.1</version> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - <configuration> - <filters> - <filter> - <artifact>*:*</artifact> - <excludes> - <exclude>META-INF/*.SF</exclude> - <exclude>META-INF/*.DSA</exclude> - <exclude>META-INF/*.RSA</exclude> - <exclude>org/slf4j/**</exclude> - <exclude>META-INF/maven/org.slf4j/**</exclude> - </excludes> - </filter> - </filters> - <transformers> - <transformer - implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> - </transformers> - <relocations> - <relocation> - <pattern>com.google</pattern> - <shadedPattern>shaded.com.google</shadedPattern> - </relocation> - <relocation> - <pattern>io.grpc.internal</pattern> - <shadedPattern>shaded.io.grpc.internal</shadedPattern> - </relocation> - <relocation> - <pattern>org.apache.commons</pattern> - <shadedPattern>shaded.org.apache.commons</shadedPattern> - <excludes> - <exclude>org.apache.hadoop</exclude> - <exclude>org.apache.log4j</exclude> - </excludes> - </relocation> - <relocation> - <pattern>org.apache.http</pattern> - <shadedPattern>shaded.org.apache.http</shadedPattern> - </relocation> - </relocations> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-gpg-plugin</artifactId> - <version>1.5</version> - <executions> - <execution> - <id>sign-artifacts</id> - <phase>verify</phase> - <goals> - <goal>sign</goal> - </goals> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.sonatype.central</groupId> - <artifactId>central-publishing-maven-plugin</artifactId> - <version>0.5.0</version> - <extensions>true</extensions> - <configuration> - <publishingServerId>central</publishingServerId> - <autoPublish>true</autoPublish> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-source-plugin</artifactId> - <version>2.2.1</version> - <executions> - <execution> - <id>attach-sources</id> - <goals> - <goal>jar-no-fork</goal> - </goals> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-javadoc-plugin</artifactId> - <version>3.0.1</version> - <executions> - <execution> - <id>attach-javadocs</id> - <goals> - <goal>jar</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - </build> - - <dependencies> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>${hadoop.version}</version> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>com.seaweedfs</groupId> - <artifactId>seaweedfs-client</artifactId> - <version>${seaweedfs.client.version}</version> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-common</artifactId> - <version>${hadoop.version}</version> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>4.13.1</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.mockito</groupId> - <artifactId>mockito-core</artifactId> - <version>3.12.4</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-common</artifactId> - <version>${hadoop.version}</version> - <scope>test</scope> - <type>test-jar</type> - </dependency> - </dependencies> - -</project> diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/BufferedByteBufferReadableInputStream.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/BufferedByteBufferReadableInputStream.java deleted file mode 100644 index 3d0b68a52..000000000 --- a/other/java/hdfs2/src/main/java/seaweed/hdfs/BufferedByteBufferReadableInputStream.java +++ /dev/null @@ -1,25 +0,0 @@ -package seaweed.hdfs; - -import org.apache.hadoop.fs.*; - -import java.io.IOException; -import java.nio.ByteBuffer; - -public class BufferedByteBufferReadableInputStream extends BufferedFSInputStream implements ByteBufferReadable { - - public BufferedByteBufferReadableInputStream(FSInputStream in, int size) { - super(in, size); - if (!(in instanceof Seekable) || !(in instanceof PositionedReadable)) { - throw new IllegalArgumentException("In is not an instance of Seekable or PositionedReadable"); - } - } - - @Override - public int read(ByteBuffer buf) throws IOException { - if (this.in instanceof ByteBufferReadable) { - return ((ByteBufferReadable)this.in).read(buf); - } else { - throw new UnsupportedOperationException("Byte-buffer read unsupported by input stream"); - } - } -} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedAbstractFileSystem.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedAbstractFileSystem.java deleted file mode 100644 index e021401aa..000000000 --- a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedAbstractFileSystem.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package seaweed.hdfs; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.DelegateToFileSystem; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; - -public class SeaweedAbstractFileSystem extends DelegateToFileSystem { - - SeaweedAbstractFileSystem(final URI uri, final Configuration conf) - throws IOException, URISyntaxException { - super(uri, new SeaweedFileSystem(), conf, "seaweedfs", false); - } - -} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java deleted file mode 100644 index 58fcaf975..000000000 --- a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystem.java +++ /dev/null @@ -1,634 +0,0 @@ -package seaweed.hdfs; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; -import org.apache.hadoop.fs.permission.AclEntry; -import org.apache.hadoop.fs.permission.AclStatus; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.Progressable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import seaweedfs.client.FilerProto; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStream; -import java.net.URI; -import java.util.EnumSet; -import java.util.List; -import java.util.Map; - -public class SeaweedFileSystem extends FileSystem { - - public static final String FS_SEAWEED_FILER_HOST = "fs.seaweed.filer.host"; - public static final String FS_SEAWEED_FILER_PORT = "fs.seaweed.filer.port"; - public static final String FS_SEAWEED_FILER_PORT_GRPC = "fs.seaweed.filer.port.grpc"; - public static final int FS_SEAWEED_DEFAULT_PORT = 8888; - public static final String FS_SEAWEED_BUFFER_SIZE = "fs.seaweed.buffer.size"; - public static final String FS_SEAWEED_REPLICATION = "fs.seaweed.replication"; - public static final String FS_SEAWEED_VOLUME_SERVER_ACCESS = "fs.seaweed.volume.server.access"; - public static final int FS_SEAWEED_DEFAULT_BUFFER_SIZE = 4 * 1024 * 1024; - public static final String FS_SEAWEED_FILER_CN = "fs.seaweed.filer.cn"; - - private static final Logger LOG = LoggerFactory.getLogger(SeaweedFileSystem.class); - - private URI uri; - private Path workingDirectory = new Path("/"); - private SeaweedFileSystemStore seaweedFileSystemStore; - - public URI getUri() { - return uri; - } - - public String getScheme() { - return "seaweedfs"; - } - - @Override - public void initialize(URI uri, Configuration conf) throws IOException { // get - super.initialize(uri, conf); - - // get host information from uri (overrides info in conf) - String host = uri.getHost(); - host = (host == null) ? conf.get(FS_SEAWEED_FILER_HOST, "localhost") : host; - conf.set(FS_SEAWEED_FILER_HOST, host); - - // get port information from uri, (overrides info in conf) - int port = uri.getPort(); - port = (port == -1) ? FS_SEAWEED_DEFAULT_PORT : port; - conf.setInt(FS_SEAWEED_FILER_PORT, port); - - int grpcPort = conf.getInt(FS_SEAWEED_FILER_PORT_GRPC, port+10000); - - setConf(conf); - this.uri = uri; - - String cn = conf.get(FS_SEAWEED_FILER_CN, ""); - - seaweedFileSystemStore = new SeaweedFileSystemStore(host, port, grpcPort, cn, conf); - } - - @Override - public void close() throws IOException { - super.close(); - this.seaweedFileSystemStore.close(); - } - - @Override - public FSDataInputStream open(Path path, int bufferSize) throws IOException { - - LOG.debug("open path: {} bufferSize:{}", path, bufferSize); - - path = qualify(path); - - try { - int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE); - FSInputStream inputStream = seaweedFileSystemStore.openFileForRead(path, statistics); - return new FSDataInputStream(new BufferedByteBufferReadableInputStream(inputStream, 4 * seaweedBufferSize)); - } catch (Exception ex) { - LOG.warn("open path: {} bufferSize:{}", path, bufferSize, ex); - return null; - } - } - - @Override - public FSDataOutputStream create(Path path, FsPermission permission, final boolean overwrite, final int bufferSize, - final short replication, final long blockSize, final Progressable progress) throws IOException { - - LOG.debug("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize); - - path = qualify(path); - - try { - String replicaPlacement = this.getConf().get(FS_SEAWEED_REPLICATION, String.format("%03d", replication - 1)); - int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE); - OutputStream outputStream = seaweedFileSystemStore.createFile(path, overwrite, permission, seaweedBufferSize, replicaPlacement); - return new FSDataOutputStream(outputStream, statistics); - } catch (Exception ex) { - LOG.warn("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize, ex); - return null; - } - } - - /** - * {@inheritDoc} - * - * @throws FileNotFoundException if the parent directory is not present -or - * is not a directory. - */ - @Override - public FSDataOutputStream createNonRecursive(Path path, - FsPermission permission, - EnumSet<CreateFlag> flags, - int bufferSize, - short replication, - long blockSize, - Progressable progress) throws IOException { - Path parent = path.getParent(); - if (parent != null) { - // expect this to raise an exception if there is no parent - if (!getFileStatus(parent).isDirectory()) { - throw new FileAlreadyExistsException("Not a directory: " + parent); - } - } - int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE); - return create(path, permission, - flags.contains(CreateFlag.OVERWRITE), bufferSize, - replication, seaweedBufferSize, progress); - } - - @Override - public FSDataOutputStream append(Path path, int bufferSize, Progressable progressable) throws IOException { - - LOG.debug("append path: {} bufferSize:{}", path, bufferSize); - - path = qualify(path); - try { - int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE); - OutputStream outputStream = seaweedFileSystemStore.createFile(path, false, null, seaweedBufferSize, ""); - return new FSDataOutputStream(outputStream, statistics); - } catch (Exception ex) { - LOG.warn("append path: {} bufferSize:{}", path, bufferSize, ex); - return null; - } - } - - @Override - public boolean rename(Path src, Path dst) throws IOException { - - LOG.debug("rename path: {} => {}", src, dst); - - if (src.isRoot()) { - return false; - } - - if (src.equals(dst)) { - return true; - } - FilerProto.Entry entry = seaweedFileSystemStore.lookupEntry(dst); - - Path adjustedDst = dst; - - if (entry != null) { - FileStatus dstFileStatus = getFileStatus(dst); - String sourceFileName = src.getName(); - if (!dstFileStatus.isDirectory()) { - return false; - } - adjustedDst = new Path(dst, sourceFileName); - } - - Path qualifiedSrcPath = qualify(src); - Path qualifiedDstPath = qualify(adjustedDst); - - seaweedFileSystemStore.rename(qualifiedSrcPath, qualifiedDstPath); - return true; - } - - @Override - public boolean delete(Path path, boolean recursive) throws IOException { - - LOG.debug("delete path: {} recursive:{}", path, recursive); - - path = qualify(path); - - FilerProto.Entry entry = seaweedFileSystemStore.lookupEntry(path); - - if (entry == null) { - return true; - } - - FileStatus fileStatus = getFileStatus(path); - - return seaweedFileSystemStore.deleteEntries(path, fileStatus.isDirectory(), recursive); - - } - - @Override - public FileStatus[] listStatus(Path path) throws IOException { - - LOG.debug("listStatus path: {}", path); - - path = qualify(path); - - return seaweedFileSystemStore.listEntries(path); - } - - @Override - public Path getWorkingDirectory() { - return workingDirectory; - } - - @Override - public void setWorkingDirectory(Path path) { - if (path.isAbsolute()) { - workingDirectory = path; - } else { - workingDirectory = new Path(workingDirectory, path); - } - } - - @Override - public boolean mkdirs(Path path, FsPermission fsPermission) throws IOException { - - LOG.debug("mkdirs path: {}", path); - - path = qualify(path); - - FilerProto.Entry entry = seaweedFileSystemStore.lookupEntry(path); - - if (entry == null) { - - UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); - return seaweedFileSystemStore.createDirectory(path, currentUser, - fsPermission == null ? FsPermission.getDirDefault() : fsPermission, - FsPermission.getUMask(getConf())); - - } - - FileStatus fileStatus = getFileStatus(path); - - if (fileStatus.isDirectory()) { - return true; - } else { - throw new FileAlreadyExistsException("Path is a file: " + path); - } - } - - @Override - public FileStatus getFileStatus(Path path) throws IOException { - - LOG.debug("getFileStatus path: {}", path); - - path = qualify(path); - - return seaweedFileSystemStore.getFileStatus(path); - } - - /** - * Set owner of a path (i.e. a file or a directory). - * The parameters owner and group cannot both be null. - * - * @param path The path - * @param owner If it is null, the original username remains unchanged. - * @param group If it is null, the original groupname remains unchanged. - */ - @Override - public void setOwner(Path path, final String owner, final String group) - throws IOException { - LOG.debug("setOwner path: {}", path); - path = qualify(path); - - seaweedFileSystemStore.setOwner(path, owner, group); - } - - - /** - * Set permission of a path. - * - * @param path The path - * @param permission Access permission - */ - @Override - public void setPermission(Path path, final FsPermission permission) throws IOException { - LOG.debug("setPermission path: {}", path); - - if (permission == null) { - throw new IllegalArgumentException("The permission can't be null"); - } - - path = qualify(path); - - seaweedFileSystemStore.setPermission(path, permission); - } - - Path qualify(Path path) { - return path.makeQualified(uri, workingDirectory); - } - - /** - * Concat existing files together. - * - * @param trg the path to the target destination. - * @param psrcs the paths to the sources to use for the concatenation. - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default). - */ - @Override - public void concat(final Path trg, final Path[] psrcs) throws IOException { - throw new UnsupportedOperationException("Not implemented by the " + - getClass().getSimpleName() + " FileSystem implementation"); - } - - /** - * Truncate the file in the indicated path to the indicated size. - * <ul> - * <li>Fails if path is a directory.</li> - * <li>Fails if path does not exist.</li> - * <li>Fails if path is not closed.</li> - * <li>Fails if new size is greater than current size.</li> - * </ul> - * - * @param f The path to the file to be truncated - * @param newLength The size the file is to be truncated to - * @return <code>true</code> if the file has been truncated to the desired - * <code>newLength</code> and is immediately available to be reused for - * write operations such as <code>append</code>, or - * <code>false</code> if a background process of adjusting the length of - * the last block has been started, and clients should wait for it to - * complete before proceeding with further file updates. - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default). - */ - @Override - public boolean truncate(Path f, long newLength) throws IOException { - throw new UnsupportedOperationException("Not implemented by the " + - getClass().getSimpleName() + " FileSystem implementation"); - } - - @Override - public void createSymlink(final Path target, final Path link, - final boolean createParent) throws - IOException { - // Supporting filesystems should override this method - throw new UnsupportedOperationException( - "Filesystem does not support symlinks!"); - } - - public boolean supportsSymlinks() { - return false; - } - - /** - * Create a snapshot. - * - * @param path The directory where snapshots will be taken. - * @param snapshotName The name of the snapshot - * @return the snapshot path. - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - */ - @Override - public Path createSnapshot(Path path, String snapshotName) - throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support createSnapshot"); - } - - /** - * Rename a snapshot. - * - * @param path The directory path where the snapshot was taken - * @param snapshotOldName Old name of the snapshot - * @param snapshotNewName New name of the snapshot - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public void renameSnapshot(Path path, String snapshotOldName, - String snapshotNewName) throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support renameSnapshot"); - } - - /** - * Delete a snapshot of a directory. - * - * @param path The directory that the to-be-deleted snapshot belongs to - * @param snapshotName The name of the snapshot - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public void deleteSnapshot(Path path, String snapshotName) - throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support deleteSnapshot"); - } - - /** - * Modifies ACL entries of files and directories. This method can add new ACL - * entries or modify the permissions on existing ACL entries. All existing - * ACL entries that are not specified in this call are retained without - * changes. (Modifications are merged into the current ACL.) - * - * @param path Path to modify - * @param aclSpec List<AclEntry> describing modifications - * @throws IOException if an ACL could not be modified - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public void modifyAclEntries(Path path, List<AclEntry> aclSpec) - throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support modifyAclEntries"); - } - - /** - * Removes ACL entries from files and directories. Other ACL entries are - * retained. - * - * @param path Path to modify - * @param aclSpec List describing entries to remove - * @throws IOException if an ACL could not be modified - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public void removeAclEntries(Path path, List<AclEntry> aclSpec) - throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support removeAclEntries"); - } - - /** - * Removes all default ACL entries from files and directories. - * - * @param path Path to modify - * @throws IOException if an ACL could not be modified - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public void removeDefaultAcl(Path path) - throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support removeDefaultAcl"); - } - - /** - * Removes all but the base ACL entries of files and directories. The entries - * for user, group, and others are retained for compatibility with permission - * bits. - * - * @param path Path to modify - * @throws IOException if an ACL could not be removed - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public void removeAcl(Path path) - throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support removeAcl"); - } - - /** - * Fully replaces ACL of files and directories, discarding all existing - * entries. - * - * @param path Path to modify - * @param aclSpec List describing modifications, which must include entries - * for user, group, and others for compatibility with permission bits. - * @throws IOException if an ACL could not be modified - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public void setAcl(Path path, List<AclEntry> aclSpec) throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support setAcl"); - } - - /** - * Gets the ACL of a file or directory. - * - * @param path Path to get - * @return AclStatus describing the ACL of the file or directory - * @throws IOException if an ACL could not be read - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public AclStatus getAclStatus(Path path) throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support getAclStatus"); - } - - /** - * Set an xattr of a file or directory. - * The name must be prefixed with the namespace followed by ".". For example, - * "user.attr". - * <p> - * Refer to the HDFS extended attributes user documentation for details. - * - * @param path Path to modify - * @param name xattr name. - * @param value xattr value. - * @param flag xattr set flag - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public void setXAttr(Path path, String name, byte[] value, - EnumSet<XAttrSetFlag> flag) throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support setXAttr"); - } - - /** - * Get an xattr name and value for a file or directory. - * The name must be prefixed with the namespace followed by ".". For example, - * "user.attr". - * <p> - * Refer to the HDFS extended attributes user documentation for details. - * - * @param path Path to get extended attribute - * @param name xattr name. - * @return byte[] xattr value. - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public byte[] getXAttr(Path path, String name) throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support getXAttr"); - } - - /** - * Get all of the xattr name/value pairs for a file or directory. - * Only those xattrs which the logged-in user has permissions to view - * are returned. - * <p> - * Refer to the HDFS extended attributes user documentation for details. - * - * @param path Path to get extended attributes - * @return Map describing the XAttrs of the file or directory - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public Map<String, byte[]> getXAttrs(Path path) throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support getXAttrs"); - } - - /** - * Get all of the xattrs name/value pairs for a file or directory. - * Only those xattrs which the logged-in user has permissions to view - * are returned. - * <p> - * Refer to the HDFS extended attributes user documentation for details. - * - * @param path Path to get extended attributes - * @param names XAttr names. - * @return Map describing the XAttrs of the file or directory - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public Map<String, byte[]> getXAttrs(Path path, List<String> names) - throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support getXAttrs"); - } - - /** - * Get all of the xattr names for a file or directory. - * Only those xattr names which the logged-in user has permissions to view - * are returned. - * <p> - * Refer to the HDFS extended attributes user documentation for details. - * - * @param path Path to get extended attributes - * @return List{@literal <String>} of the XAttr names of the file or directory - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public List<String> listXAttrs(Path path) throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support listXAttrs"); - } - - /** - * Remove an xattr of a file or directory. - * The name must be prefixed with the namespace followed by ".". For example, - * "user.attr". - * <p> - * Refer to the HDFS extended attributes user documentation for details. - * - * @param path Path to remove extended attribute - * @param name xattr name - * @throws IOException IO failure - * @throws UnsupportedOperationException if the operation is unsupported - * (default outcome). - */ - @Override - public void removeXAttr(Path path, String name) throws IOException { - throw new UnsupportedOperationException(getClass().getSimpleName() - + " doesn't support removeXAttr"); - } - -} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java deleted file mode 100644 index f65c1961b..000000000 --- a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java +++ /dev/null @@ -1,291 +0,0 @@ -package seaweed.hdfs; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.security.UserGroupInformation; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import seaweedfs.client.*; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import static seaweed.hdfs.SeaweedFileSystem.*; - -public class SeaweedFileSystemStore { - - private static final Logger LOG = LoggerFactory.getLogger(SeaweedFileSystemStore.class); - - private FilerClient filerClient; - private Configuration conf; - - public SeaweedFileSystemStore(String host, int port, int grpcPort, String cn, Configuration conf) { - filerClient = new FilerClient(host, port, grpcPort, cn); - this.conf = conf; - String volumeServerAccessMode = this.conf.get(FS_SEAWEED_VOLUME_SERVER_ACCESS, "direct"); - if (volumeServerAccessMode.equals("publicUrl")) { - filerClient.setAccessVolumeServerByPublicUrl(); - } else if (volumeServerAccessMode.equals("filerProxy")) { - filerClient.setAccessVolumeServerByFilerProxy(); - } - } - - public void close() { - try { - this.filerClient.shutdown(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - public static String getParentDirectory(Path path) { - return path.isRoot() ? "/" : path.getParent().toUri().getPath(); - } - - static int permissionToMode(FsPermission permission, boolean isDirectory) { - int p = permission.toShort(); - if (isDirectory) { - p = p | 1 << 31; - } - return p; - } - - public boolean createDirectory(final Path path, UserGroupInformation currentUser, - final FsPermission permission, final FsPermission umask) { - - LOG.debug("createDirectory path: {} permission: {} umask: {}", - path, - permission, - umask); - - return filerClient.mkdirs( - path.toUri().getPath(), - permissionToMode(permission, true), - currentUser.getUserName(), - currentUser.getGroupNames() - ); - } - - public FileStatus[] listEntries(final Path path) throws IOException { - LOG.debug("listEntries path: {}", path); - - FileStatus pathStatus = getFileStatus(path); - - if (pathStatus == null) { - return new FileStatus[0]; - } - - if (!pathStatus.isDirectory()) { - return new FileStatus[]{pathStatus}; - } - - List<FileStatus> fileStatuses = new ArrayList<FileStatus>(); - - List<FilerProto.Entry> entries = filerClient.listEntries(path.toUri().getPath()); - - for (FilerProto.Entry entry : entries) { - - FileStatus fileStatus = doGetFileStatus(new Path(path, entry.getName()), entry); - - fileStatuses.add(fileStatus); - } - LOG.debug("listEntries path: {} size {}", fileStatuses, fileStatuses.size()); - return fileStatuses.toArray(new FileStatus[0]); - - } - - public FileStatus getFileStatus(final Path path) throws IOException { - - FilerProto.Entry entry = lookupEntry(path); - if (entry == null) { - throw new FileNotFoundException("File does not exist: " + path); - } - LOG.debug("doGetFileStatus path:{} entry:{}", path, entry); - - FileStatus fileStatus = doGetFileStatus(path, entry); - return fileStatus; - } - - public boolean deleteEntries(final Path path, boolean isDirectory, boolean recursive) { - LOG.debug("deleteEntries path: {} isDirectory {} recursive: {}", - path, - String.valueOf(isDirectory), - String.valueOf(recursive)); - - if (path.isRoot()) { - return true; - } - - if (recursive && isDirectory) { - List<FilerProto.Entry> entries = filerClient.listEntries(path.toUri().getPath()); - for (FilerProto.Entry entry : entries) { - deleteEntries(new Path(path, entry.getName()), entry.getIsDirectory(), true); - } - } - - return filerClient.deleteEntry(getParentDirectory(path), path.getName(), true, recursive, true); - } - - private FileStatus doGetFileStatus(Path path, FilerProto.Entry entry) { - FilerProto.FuseAttributes attributes = entry.getAttributes(); - long length = SeaweedRead.fileSize(entry); - boolean isDir = entry.getIsDirectory(); - int block_replication = 1; - int blocksize = this.conf.getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE); - long modification_time = attributes.getMtime() * 1000; // milliseconds - long access_time = 0; - FsPermission permission = FsPermission.createImmutable((short) attributes.getFileMode()); - String owner = attributes.getUserName(); - String group = attributes.getGroupNameCount() > 0 ? attributes.getGroupName(0) : ""; - return new FileStatus(length, isDir, block_replication, blocksize, - modification_time, access_time, permission, owner, group, null, path); - } - - public FilerProto.Entry lookupEntry(Path path) { - - return filerClient.lookupEntry(getParentDirectory(path), path.getName()); - - } - - public void rename(Path source, Path destination) { - - LOG.debug("rename source: {} destination:{}", source, destination); - - if (source.isRoot()) { - return; - } - LOG.info("rename source: {} destination:{}", source, destination); - FilerProto.Entry entry = lookupEntry(source); - if (entry == null) { - LOG.warn("rename non-existing source: {}", source); - return; - } - filerClient.mv(source.toUri().getPath(), destination.toUri().getPath()); - } - - public OutputStream createFile(final Path path, - final boolean overwrite, - FsPermission permission, - int bufferSize, - String replication) throws IOException { - - permission = permission == null ? FsPermission.getFileDefault() : permission; - - LOG.debug("createFile path: {} overwrite: {} permission: {}", - path, - overwrite, - permission.toString()); - - UserGroupInformation userGroupInformation = UserGroupInformation.getCurrentUser(); - long now = System.currentTimeMillis() / 1000L; - - FilerProto.Entry.Builder entry = null; - long writePosition = 0; - if (!overwrite) { - FilerProto.Entry existingEntry = lookupEntry(path); - LOG.debug("createFile merged entry path:{} existingEntry:{}", path, existingEntry); - if (existingEntry != null) { - entry = FilerProto.Entry.newBuilder(); - entry.mergeFrom(existingEntry); - entry.clearContent(); - entry.getAttributesBuilder().setMtime(now); - LOG.debug("createFile merged entry path:{} entry:{} from:{}", path, entry, existingEntry); - writePosition = SeaweedRead.fileSize(existingEntry); - } - } - if (entry == null) { - entry = FilerProto.Entry.newBuilder() - .setName(path.getName()) - .setIsDirectory(false) - .setAttributes(FilerProto.FuseAttributes.newBuilder() - .setFileMode(permissionToMode(permission, false)) - .setCrtime(now) - .setMtime(now) - .setUserName(userGroupInformation.getUserName()) - .clearGroupName() - .addAllGroupName(Arrays.asList(userGroupInformation.getGroupNames())) - ); - SeaweedWrite.writeMeta(filerClient, getParentDirectory(path), entry); - } - - return new SeaweedHadoopOutputStream(filerClient, path.toString(), entry, writePosition, bufferSize, replication); - - } - - public FSInputStream openFileForRead(final Path path, FileSystem.Statistics statistics) throws IOException { - - LOG.debug("openFileForRead path:{}", path); - - FilerProto.Entry entry = lookupEntry(path); - - if (entry == null) { - throw new FileNotFoundException("read non-exist file " + path); - } - - return new SeaweedHadoopInputStream(filerClient, - statistics, - path.toUri().getPath(), - entry); - } - - public void setOwner(Path path, String owner, String group) { - - LOG.debug("setOwner path:{} owner:{} group:{}", path, owner, group); - - FilerProto.Entry entry = lookupEntry(path); - if (entry == null) { - LOG.debug("setOwner path:{} entry:{}", path, entry); - return; - } - - FilerProto.Entry.Builder entryBuilder = entry.toBuilder(); - FilerProto.FuseAttributes.Builder attributesBuilder = entry.getAttributes().toBuilder(); - - if (owner != null) { - attributesBuilder.setUserName(owner); - } - if (group != null) { - attributesBuilder.clearGroupName(); - attributesBuilder.addGroupName(group); - } - - entryBuilder.setAttributes(attributesBuilder); - - LOG.debug("setOwner path:{} entry:{}", path, entryBuilder); - - filerClient.updateEntry(getParentDirectory(path), entryBuilder.build()); - - } - - public void setPermission(Path path, FsPermission permission) { - - LOG.debug("setPermission path:{} permission:{}", path, permission); - - FilerProto.Entry entry = lookupEntry(path); - if (entry == null) { - LOG.debug("setPermission path:{} entry:{}", path, entry); - return; - } - - FilerProto.Entry.Builder entryBuilder = entry.toBuilder(); - FilerProto.FuseAttributes.Builder attributesBuilder = entry.getAttributes().toBuilder(); - - attributesBuilder.setFileMode(permissionToMode(permission, entry.getIsDirectory())); - - entryBuilder.setAttributes(attributesBuilder); - - LOG.debug("setPermission path:{} entry:{}", path, entryBuilder); - - filerClient.updateEntry(getParentDirectory(path), entryBuilder.build()); - - } - -} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java deleted file mode 100644 index f26eae597..000000000 --- a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java +++ /dev/null @@ -1,150 +0,0 @@ -package seaweed.hdfs; - -// based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream - -import org.apache.hadoop.fs.ByteBufferReadable; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.FileSystem.Statistics; -import seaweedfs.client.FilerClient; -import seaweedfs.client.FilerProto; -import seaweedfs.client.SeaweedInputStream; - -import java.io.EOFException; -import java.io.IOException; -import java.nio.ByteBuffer; - -public class SeaweedHadoopInputStream extends FSInputStream implements ByteBufferReadable { - - private final SeaweedInputStream seaweedInputStream; - private final Statistics statistics; - - public SeaweedHadoopInputStream( - final FilerClient filerClient, - final Statistics statistics, - final String path, - final FilerProto.Entry entry) throws IOException { - this.seaweedInputStream = new SeaweedInputStream(filerClient, path, entry); - this.statistics = statistics; - } - - @Override - public int read() throws IOException { - return seaweedInputStream.read(); - } - - @Override - public int read(final byte[] b, final int off, final int len) throws IOException { - return seaweedInputStream.read(b, off, len); - } - - // implement ByteBufferReadable - @Override - public synchronized int read(ByteBuffer buf) throws IOException { - int bytesRead = seaweedInputStream.read(buf); - - if (bytesRead > 0) { - if (statistics != null) { - statistics.incrementBytesRead(bytesRead); - } - } - - return bytesRead; - } - - /** - * Seek to given position in stream. - * - * @param n position to seek to - * @throws IOException if there is an error - * @throws EOFException if attempting to seek past end of file - */ - @Override - public synchronized void seek(long n) throws IOException { - seaweedInputStream.seek(n); - } - - @Override - public synchronized long skip(long n) throws IOException { - return seaweedInputStream.skip(n); - } - - /** - * Return the size of the remaining available bytes - * if the size is less than or equal to {@link Integer#MAX_VALUE}, - * otherwise, return {@link Integer#MAX_VALUE}. - * <p> - * This is to match the behavior of DFSInputStream.available(), - * which some clients may rely on (HBase write-ahead log reading in - * particular). - */ - @Override - public synchronized int available() throws IOException { - return seaweedInputStream.available(); - } - - /** - * Returns the length of the file that this stream refers to. Note that the length returned is the length - * as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, - * they wont be reflected in the returned length. - * - * @return length of the file. - * @throws IOException if the stream is closed - */ - public long length() throws IOException { - return seaweedInputStream.length(); - } - - /** - * Return the current offset from the start of the file - * - * @throws IOException throws {@link IOException} if there is an error - */ - @Override - public synchronized long getPos() throws IOException { - return seaweedInputStream.getPos(); - } - - /** - * Seeks a different copy of the data. Returns true if - * found a new source, false otherwise. - * - * @throws IOException throws {@link IOException} if there is an error - */ - @Override - public boolean seekToNewSource(long l) throws IOException { - return false; - } - - @Override - public synchronized void close() throws IOException { - seaweedInputStream.close(); - } - - /** - * Not supported by this stream. Throws {@link UnsupportedOperationException} - * - * @param readlimit ignored - */ - @Override - public synchronized void mark(int readlimit) { - throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); - } - - /** - * Not supported by this stream. Throws {@link UnsupportedOperationException} - */ - @Override - public synchronized void reset() throws IOException { - throw new UnsupportedOperationException("mark()/reset() not supported on this stream"); - } - - /** - * gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. - * - * @return always {@code false} - */ - @Override - public boolean markSupported() { - return false; - } -} diff --git a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedHadoopOutputStream.java b/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedHadoopOutputStream.java deleted file mode 100644 index da5b56bbc..000000000 --- a/other/java/hdfs2/src/main/java/seaweed/hdfs/SeaweedHadoopOutputStream.java +++ /dev/null @@ -1,16 +0,0 @@ -package seaweed.hdfs; - -// adapted from org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream - -import seaweedfs.client.FilerClient; -import seaweedfs.client.FilerProto; -import seaweedfs.client.SeaweedOutputStream; - -public class SeaweedHadoopOutputStream extends SeaweedOutputStream { - - public SeaweedHadoopOutputStream(FilerClient filerClient, final String path, FilerProto.Entry.Builder entry, - final long position, final int bufferSize, final String replication) { - super(filerClient, path, entry, position, bufferSize, replication); - } - -} diff --git a/other/java/hdfs2/src/test/java/seaweed/hdfs/SeaweedFileSystemConfigTest.java b/other/java/hdfs2/src/test/java/seaweed/hdfs/SeaweedFileSystemConfigTest.java deleted file mode 100644 index bcc08b8e2..000000000 --- a/other/java/hdfs2/src/test/java/seaweed/hdfs/SeaweedFileSystemConfigTest.java +++ /dev/null @@ -1,90 +0,0 @@ -package seaweed.hdfs; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.junit.Before; -import org.junit.Test; - -import static org.junit.Assert.*; - -/** - * Unit tests for SeaweedFileSystem configuration that don't require a running SeaweedFS instance. - * - * These tests verify basic properties and constants. - */ -public class SeaweedFileSystemConfigTest { - - private SeaweedFileSystem fs; - private Configuration conf; - - @Before - public void setUp() { - fs = new SeaweedFileSystem(); - conf = new Configuration(); - } - - @Test - public void testScheme() { - assertEquals("seaweedfs", fs.getScheme()); - } - - @Test - public void testConstants() { - // Test that constants are defined correctly - assertEquals("fs.seaweed.filer.host", SeaweedFileSystem.FS_SEAWEED_FILER_HOST); - assertEquals("fs.seaweed.filer.port", SeaweedFileSystem.FS_SEAWEED_FILER_PORT); - assertEquals("fs.seaweed.filer.port.grpc", SeaweedFileSystem.FS_SEAWEED_FILER_PORT_GRPC); - assertEquals(8888, SeaweedFileSystem.FS_SEAWEED_DEFAULT_PORT); - assertEquals("fs.seaweed.buffer.size", SeaweedFileSystem.FS_SEAWEED_BUFFER_SIZE); - assertEquals(4 * 1024 * 1024, SeaweedFileSystem.FS_SEAWEED_DEFAULT_BUFFER_SIZE); - assertEquals("fs.seaweed.replication", SeaweedFileSystem.FS_SEAWEED_REPLICATION); - assertEquals("fs.seaweed.volume.server.access", SeaweedFileSystem.FS_SEAWEED_VOLUME_SERVER_ACCESS); - assertEquals("fs.seaweed.filer.cn", SeaweedFileSystem.FS_SEAWEED_FILER_CN); - } - - @Test - public void testWorkingDirectoryPathOperations() { - // Test path operations that don't require initialization - Path testPath = new Path("/test/path"); - assertTrue("Path should be absolute", testPath.isAbsolute()); - assertEquals("/test/path", testPath.toUri().getPath()); - - Path childPath = new Path(testPath, "child"); - assertEquals("/test/path/child", childPath.toUri().getPath()); - } - - @Test - public void testConfigurationProperties() { - // Test that configuration can be set and read - conf.set(SeaweedFileSystem.FS_SEAWEED_FILER_HOST, "testhost"); - assertEquals("testhost", conf.get(SeaweedFileSystem.FS_SEAWEED_FILER_HOST)); - - conf.setInt(SeaweedFileSystem.FS_SEAWEED_FILER_PORT, 9999); - assertEquals(9999, conf.getInt(SeaweedFileSystem.FS_SEAWEED_FILER_PORT, 0)); - - conf.setInt(SeaweedFileSystem.FS_SEAWEED_BUFFER_SIZE, 8 * 1024 * 1024); - assertEquals(8 * 1024 * 1024, conf.getInt(SeaweedFileSystem.FS_SEAWEED_BUFFER_SIZE, 0)); - - conf.set(SeaweedFileSystem.FS_SEAWEED_REPLICATION, "001"); - assertEquals("001", conf.get(SeaweedFileSystem.FS_SEAWEED_REPLICATION)); - - conf.set(SeaweedFileSystem.FS_SEAWEED_VOLUME_SERVER_ACCESS, "publicUrl"); - assertEquals("publicUrl", conf.get(SeaweedFileSystem.FS_SEAWEED_VOLUME_SERVER_ACCESS)); - - conf.set(SeaweedFileSystem.FS_SEAWEED_FILER_CN, "test-cn"); - assertEquals("test-cn", conf.get(SeaweedFileSystem.FS_SEAWEED_FILER_CN)); - } - - @Test - public void testDefaultBufferSize() { - // Test default buffer size constant - int expected = 4 * 1024 * 1024; // 4MB - assertEquals(expected, SeaweedFileSystem.FS_SEAWEED_DEFAULT_BUFFER_SIZE); - } - - @Test - public void testDefaultPort() { - // Test default port constant - assertEquals(8888, SeaweedFileSystem.FS_SEAWEED_DEFAULT_PORT); - } -} diff --git a/other/java/hdfs2/src/test/java/seaweed/hdfs/SeaweedFileSystemTest.java b/other/java/hdfs2/src/test/java/seaweed/hdfs/SeaweedFileSystemTest.java deleted file mode 100644 index ec43b3481..000000000 --- a/other/java/hdfs2/src/test/java/seaweed/hdfs/SeaweedFileSystemTest.java +++ /dev/null @@ -1,379 +0,0 @@ -package seaweed.hdfs; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import java.io.IOException; -import java.net.URI; - -import static org.junit.Assert.*; - -/** - * Unit tests for SeaweedFileSystem. - * - * These tests verify basic FileSystem operations against a SeaweedFS backend. - * Note: These tests require a running SeaweedFS filer instance. - * - * To run tests, ensure SeaweedFS is running with default ports: - * - Filer HTTP: 8888 - * - Filer gRPC: 18888 - * - * Set environment variable SEAWEEDFS_TEST_ENABLED=true to enable these tests. - */ -public class SeaweedFileSystemTest { - - private SeaweedFileSystem fs; - private Configuration conf; - private static final String TEST_ROOT = "/test-hdfs2"; - private static final boolean TESTS_ENABLED = - "true".equalsIgnoreCase(System.getenv("SEAWEEDFS_TEST_ENABLED")); - - @Before - public void setUp() throws Exception { - if (!TESTS_ENABLED) { - return; - } - - conf = new Configuration(); - conf.set("fs.seaweed.filer.host", "localhost"); - conf.setInt("fs.seaweed.filer.port", 8888); - conf.setInt("fs.seaweed.filer.port.grpc", 18888); - - fs = new SeaweedFileSystem(); - URI uri = new URI("seaweedfs://localhost:8888/"); - fs.initialize(uri, conf); - - // Clean up any existing test directory - Path testPath = new Path(TEST_ROOT); - if (fs.exists(testPath)) { - fs.delete(testPath, true); - } - } - - @After - public void tearDown() throws Exception { - if (!TESTS_ENABLED || fs == null) { - return; - } - - // Clean up test directory - Path testPath = new Path(TEST_ROOT); - if (fs.exists(testPath)) { - fs.delete(testPath, true); - } - - fs.close(); - } - - @Test - public void testInitialization() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - assertNotNull(fs); - assertEquals("seaweedfs", fs.getScheme()); - assertNotNull(fs.getUri()); - assertEquals("/", fs.getWorkingDirectory().toUri().getPath()); - } - - @Test - public void testMkdirs() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path testDir = new Path(TEST_ROOT + "/testdir"); - assertTrue("Failed to create directory", fs.mkdirs(testDir)); - assertTrue("Directory should exist", fs.exists(testDir)); - - FileStatus status = fs.getFileStatus(testDir); - assertTrue("Path should be a directory", status.isDirectory()); - } - - @Test - public void testCreateAndReadFile() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path testFile = new Path(TEST_ROOT + "/testfile.txt"); - String testContent = "Hello, SeaweedFS!"; - - // Create and write to file - FSDataOutputStream out = fs.create(testFile, FsPermission.getDefault(), - false, 4096, (short) 1, 4 * 1024 * 1024, null); - assertNotNull("Output stream should not be null", out); - out.write(testContent.getBytes()); - out.close(); - - // Verify file exists - assertTrue("File should exist", fs.exists(testFile)); - - // Read and verify content - FSDataInputStream in = fs.open(testFile, 4096); - assertNotNull("Input stream should not be null", in); - byte[] buffer = new byte[testContent.length()]; - int bytesRead = in.read(buffer); - in.close(); - - assertEquals("Should read all bytes", testContent.length(), bytesRead); - assertEquals("Content should match", testContent, new String(buffer)); - } - - @Test - public void testFileStatus() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path testFile = new Path(TEST_ROOT + "/statustest.txt"); - String content = "test content"; - - FSDataOutputStream out = fs.create(testFile); - out.write(content.getBytes()); - out.close(); - - FileStatus status = fs.getFileStatus(testFile); - assertNotNull("FileStatus should not be null", status); - assertFalse("Should not be a directory", status.isDirectory()); - assertTrue("Should be a file", status.isFile()); - assertEquals("File length should match", content.length(), status.getLen()); - assertNotNull("Path should not be null", status.getPath()); - } - - @Test - public void testListStatus() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path testDir = new Path(TEST_ROOT + "/listtest"); - fs.mkdirs(testDir); - - // Create multiple files - for (int i = 0; i < 3; i++) { - Path file = new Path(testDir, "file" + i + ".txt"); - FSDataOutputStream out = fs.create(file); - out.write(("content" + i).getBytes()); - out.close(); - } - - FileStatus[] statuses = fs.listStatus(testDir); - assertNotNull("List should not be null", statuses); - assertEquals("Should have 3 files", 3, statuses.length); - } - - @Test - public void testRename() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path srcFile = new Path(TEST_ROOT + "/source.txt"); - Path dstFile = new Path(TEST_ROOT + "/destination.txt"); - String content = "rename test"; - - // Create source file - FSDataOutputStream out = fs.create(srcFile); - out.write(content.getBytes()); - out.close(); - - assertTrue("Source file should exist", fs.exists(srcFile)); - - // Rename - assertTrue("Rename should succeed", fs.rename(srcFile, dstFile)); - - // Verify - assertFalse("Source file should not exist", fs.exists(srcFile)); - assertTrue("Destination file should exist", fs.exists(dstFile)); - - // Verify content preserved - FSDataInputStream in = fs.open(dstFile); - byte[] buffer = new byte[content.length()]; - in.read(buffer); - in.close(); - assertEquals("Content should be preserved", content, new String(buffer)); - } - - @Test - public void testDelete() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path testFile = new Path(TEST_ROOT + "/deletetest.txt"); - - // Create file - FSDataOutputStream out = fs.create(testFile); - out.write("delete me".getBytes()); - out.close(); - - assertTrue("File should exist before delete", fs.exists(testFile)); - - // Delete - assertTrue("Delete should succeed", fs.delete(testFile, false)); - assertFalse("File should not exist after delete", fs.exists(testFile)); - } - - @Test - public void testDeleteDirectory() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path testDir = new Path(TEST_ROOT + "/deletedir"); - Path testFile = new Path(testDir, "file.txt"); - - // Create directory with file - fs.mkdirs(testDir); - FSDataOutputStream out = fs.create(testFile); - out.write("content".getBytes()); - out.close(); - - assertTrue("Directory should exist", fs.exists(testDir)); - assertTrue("File should exist", fs.exists(testFile)); - - // Recursive delete - assertTrue("Recursive delete should succeed", fs.delete(testDir, true)); - assertFalse("Directory should not exist after delete", fs.exists(testDir)); - assertFalse("File should not exist after delete", fs.exists(testFile)); - } - - @Test - public void testAppend() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path testFile = new Path(TEST_ROOT + "/appendtest.txt"); - String initialContent = "initial"; - String appendContent = " appended"; - - // Create initial file - FSDataOutputStream out = fs.create(testFile); - out.write(initialContent.getBytes()); - out.close(); - - // Append - FSDataOutputStream appendOut = fs.append(testFile, 4096, null); - assertNotNull("Append stream should not be null", appendOut); - appendOut.write(appendContent.getBytes()); - appendOut.close(); - - // Verify combined content - FSDataInputStream in = fs.open(testFile); - byte[] buffer = new byte[initialContent.length() + appendContent.length()]; - int bytesRead = in.read(buffer); - in.close(); - - String expected = initialContent + appendContent; - assertEquals("Should read all bytes", expected.length(), bytesRead); - assertEquals("Content should match", expected, new String(buffer)); - } - - @Test - public void testSetWorkingDirectory() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path originalWd = fs.getWorkingDirectory(); - assertEquals("Original working directory should be /", "/", originalWd.toUri().getPath()); - - Path newWd = new Path(TEST_ROOT); - fs.mkdirs(newWd); - fs.setWorkingDirectory(newWd); - - Path currentWd = fs.getWorkingDirectory(); - assertTrue("Working directory should be updated", - currentWd.toUri().getPath().contains(TEST_ROOT)); - } - - @Test - public void testSetPermission() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path testFile = new Path(TEST_ROOT + "/permtest.txt"); - - // Create file - FSDataOutputStream out = fs.create(testFile); - out.write("permission test".getBytes()); - out.close(); - - // Set permission - FsPermission newPerm = new FsPermission((short) 0644); - fs.setPermission(testFile, newPerm); - - FileStatus status = fs.getFileStatus(testFile); - assertNotNull("Permission should not be null", status.getPermission()); - } - - @Test - public void testSetOwner() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path testFile = new Path(TEST_ROOT + "/ownertest.txt"); - - // Create file - FSDataOutputStream out = fs.create(testFile); - out.write("owner test".getBytes()); - out.close(); - - // Set owner - this may not fail even if not fully implemented - fs.setOwner(testFile, "testuser", "testgroup"); - - // Just verify the call doesn't throw an exception - FileStatus status = fs.getFileStatus(testFile); - assertNotNull("FileStatus should not be null", status); - } - - @Test - public void testRenameToExistingDirectory() throws Exception { - if (!TESTS_ENABLED) { - System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); - return; - } - - Path srcFile = new Path(TEST_ROOT + "/movefile.txt"); - Path dstDir = new Path(TEST_ROOT + "/movedir"); - - // Create source file and destination directory - FSDataOutputStream out = fs.create(srcFile); - out.write("move test".getBytes()); - out.close(); - fs.mkdirs(dstDir); - - // Rename file to existing directory (should move file into directory) - assertTrue("Rename to directory should succeed", fs.rename(srcFile, dstDir)); - - // File should be moved into the directory - Path expectedLocation = new Path(dstDir, srcFile.getName()); - assertTrue("File should exist in destination directory", fs.exists(expectedLocation)); - assertFalse("Source file should not exist", fs.exists(srcFile)); - } -} - diff --git a/other/java/hdfs3/README.md b/other/java/hdfs3/README.md index f1afee264..e08f02a7c 100644 --- a/other/java/hdfs3/README.md +++ b/other/java/hdfs3/README.md @@ -130,6 +130,15 @@ The test suite covers: <name>fs.seaweed.filer.port.grpc</name> <value>18888</value> </property> + <!-- Optional: Replication configuration with three priority levels: + 1) If set to non-empty value (e.g. "001") - uses that value + 2) If set to empty string "" - uses SeaweedFS filer's default replication + 3) If not configured (property not present) - uses HDFS replication parameter + --> + <!-- <property> + <name>fs.seaweed.replication</name> + <value>001</value> + </property> --> </configuration> ``` diff --git a/other/java/hdfs3/dependency-reduced-pom.xml b/other/java/hdfs3/dependency-reduced-pom.xml index d3c2751a5..c6579c3fb 100644 --- a/other/java/hdfs3/dependency-reduced-pom.xml +++ b/other/java/hdfs3/dependency-reduced-pom.xml @@ -572,7 +572,7 @@ </dependency>
</dependencies>
<properties>
- <seaweedfs.client.version>3.80</seaweedfs.client.version>
+ <seaweedfs.client.version>3.80.1-SNAPSHOT</seaweedfs.client.version>
<hadoop.version>3.4.0</hadoop.version>
</properties>
</project>
diff --git a/other/java/hdfs3/pom.xml b/other/java/hdfs3/pom.xml index 061d4d700..824db8264 100644 --- a/other/java/hdfs3/pom.xml +++ b/other/java/hdfs3/pom.xml @@ -5,7 +5,7 @@ <modelVersion>4.0.0</modelVersion> <properties> - <seaweedfs.client.version>3.80</seaweedfs.client.version> + <seaweedfs.client.version>3.80.1-SNAPSHOT</seaweedfs.client.version> <hadoop.version>3.4.0</hadoop.version> </properties> diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java index 58fcaf975..513266d69 100644 --- a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystem.java @@ -59,7 +59,7 @@ public class SeaweedFileSystem extends FileSystem { port = (port == -1) ? FS_SEAWEED_DEFAULT_PORT : port; conf.setInt(FS_SEAWEED_FILER_PORT, port); - int grpcPort = conf.getInt(FS_SEAWEED_FILER_PORT_GRPC, port+10000); + int grpcPort = conf.getInt(FS_SEAWEED_FILER_PORT_GRPC, port + 10000); setConf(conf); this.uri = uri; @@ -85,29 +85,45 @@ public class SeaweedFileSystem extends FileSystem { try { int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE); FSInputStream inputStream = seaweedFileSystemStore.openFileForRead(path, statistics); - return new FSDataInputStream(new BufferedByteBufferReadableInputStream(inputStream, 4 * seaweedBufferSize)); + + // Use BufferedFSInputStream for all streams (like RawLocalFileSystem) + // This ensures proper position tracking for positioned reads (critical for + // Parquet) + return new FSDataInputStream(new BufferedFSInputStream(inputStream, 4 * seaweedBufferSize)); } catch (Exception ex) { - LOG.warn("open path: {} bufferSize:{}", path, bufferSize, ex); - return null; + LOG.error("Failed to open file: {} bufferSize:{}", path, bufferSize, ex); + throw new IOException("Failed to open file: " + path, ex); } } @Override public FSDataOutputStream create(Path path, FsPermission permission, final boolean overwrite, final int bufferSize, - final short replication, final long blockSize, final Progressable progress) throws IOException { + final short replication, final long blockSize, final Progressable progress) throws IOException { LOG.debug("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize); path = qualify(path); + final Path finalPath = path; // For use in anonymous inner class try { - String replicaPlacement = this.getConf().get(FS_SEAWEED_REPLICATION, String.format("%03d", replication - 1)); + // Priority: 1) non-empty FS_SEAWEED_REPLICATION, 2) empty string -> filer + // default, 3) null -> HDFS replication + String replicaPlacement = this.getConf().get(FS_SEAWEED_REPLICATION); + if (replicaPlacement == null) { + // Not configured, use HDFS replication parameter. This creates a "00N" + // replication string, + // placing N (replication-1) extra replicas on different servers in the same + // rack. + replicaPlacement = String.format("%03d", replication - 1); + } int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE); - OutputStream outputStream = seaweedFileSystemStore.createFile(path, overwrite, permission, seaweedBufferSize, replicaPlacement); + OutputStream outputStream = seaweedFileSystemStore.createFile(path, + overwrite, permission, + seaweedBufferSize, replicaPlacement); return new FSDataOutputStream(outputStream, statistics); } catch (Exception ex) { - LOG.warn("create path: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize, ex); - return null; + LOG.error("Failed to create file: {} bufferSize:{} blockSize:{}", path, bufferSize, blockSize, ex); + throw new IOException("Failed to create file: " + path, ex); } } @@ -119,12 +135,12 @@ public class SeaweedFileSystem extends FileSystem { */ @Override public FSDataOutputStream createNonRecursive(Path path, - FsPermission permission, - EnumSet<CreateFlag> flags, - int bufferSize, - short replication, - long blockSize, - Progressable progress) throws IOException { + FsPermission permission, + EnumSet<CreateFlag> flags, + int bufferSize, + short replication, + long blockSize, + Progressable progress) throws IOException { Path parent = path.getParent(); if (parent != null) { // expect this to raise an exception if there is no parent @@ -144,13 +160,15 @@ public class SeaweedFileSystem extends FileSystem { LOG.debug("append path: {} bufferSize:{}", path, bufferSize); path = qualify(path); + final Path finalPath = path; // For use in anonymous inner class try { int seaweedBufferSize = this.getConf().getInt(FS_SEAWEED_BUFFER_SIZE, FS_SEAWEED_DEFAULT_BUFFER_SIZE); - OutputStream outputStream = seaweedFileSystemStore.createFile(path, false, null, seaweedBufferSize, ""); + SeaweedHadoopOutputStream outputStream = (SeaweedHadoopOutputStream) seaweedFileSystemStore.createFile(path, + false, null, seaweedBufferSize, ""); return new FSDataOutputStream(outputStream, statistics); } catch (Exception ex) { - LOG.warn("append path: {} bufferSize:{}", path, bufferSize, ex); - return null; + LOG.error("Failed to append to file: {} bufferSize:{}", path, bufferSize, ex); + throw new IOException("Failed to append to file: " + path, ex); } } @@ -283,7 +301,6 @@ public class SeaweedFileSystem extends FileSystem { seaweedFileSystemStore.setOwner(path, owner, group); } - /** * Set permission of a path. * @@ -334,11 +351,11 @@ public class SeaweedFileSystem extends FileSystem { * @param f The path to the file to be truncated * @param newLength The size the file is to be truncated to * @return <code>true</code> if the file has been truncated to the desired - * <code>newLength</code> and is immediately available to be reused for - * write operations such as <code>append</code>, or - * <code>false</code> if a background process of adjusting the length of - * the last block has been started, and clients should wait for it to - * complete before proceeding with further file updates. + * <code>newLength</code> and is immediately available to be reused for + * write operations such as <code>append</code>, or + * <code>false</code> if a background process of adjusting the length of + * the last block has been started, and clients should wait for it to + * complete before proceeding with further file updates. * @throws IOException IO failure * @throws UnsupportedOperationException if the operation is unsupported * (default). @@ -351,8 +368,7 @@ public class SeaweedFileSystem extends FileSystem { @Override public void createSymlink(final Path target, final Path link, - final boolean createParent) throws - IOException { + final boolean createParent) throws IOException { // Supporting filesystems should override this method throw new UnsupportedOperationException( "Filesystem does not support symlinks!"); @@ -390,7 +406,7 @@ public class SeaweedFileSystem extends FileSystem { */ @Override public void renameSnapshot(Path path, String snapshotOldName, - String snapshotNewName) throws IOException { + String snapshotNewName) throws IOException { throw new UnsupportedOperationException(getClass().getSimpleName() + " doesn't support renameSnapshot"); } @@ -412,10 +428,10 @@ public class SeaweedFileSystem extends FileSystem { } /** - * Modifies ACL entries of files and directories. This method can add new ACL - * entries or modify the permissions on existing ACL entries. All existing + * Modifies ACL entries of files and directories. This method can add new ACL + * entries or modify the permissions on existing ACL entries. All existing * ACL entries that are not specified in this call are retained without - * changes. (Modifications are merged into the current ACL.) + * changes. (Modifications are merged into the current ACL.) * * @param path Path to modify * @param aclSpec List<AclEntry> describing modifications @@ -431,7 +447,7 @@ public class SeaweedFileSystem extends FileSystem { } /** - * Removes ACL entries from files and directories. Other ACL entries are + * Removes ACL entries from files and directories. Other ACL entries are * retained. * * @param path Path to modify @@ -463,7 +479,7 @@ public class SeaweedFileSystem extends FileSystem { } /** - * Removes all but the base ACL entries of files and directories. The entries + * Removes all but the base ACL entries of files and directories. The entries * for user, group, and others are retained for compatibility with permission * bits. * @@ -485,7 +501,8 @@ public class SeaweedFileSystem extends FileSystem { * * @param path Path to modify * @param aclSpec List describing modifications, which must include entries - * for user, group, and others for compatibility with permission bits. + * for user, group, and others for compatibility with permission + * bits. * @throws IOException if an ACL could not be modified * @throws UnsupportedOperationException if the operation is unsupported * (default outcome). @@ -528,7 +545,7 @@ public class SeaweedFileSystem extends FileSystem { */ @Override public void setXAttr(Path path, String name, byte[] value, - EnumSet<XAttrSetFlag> flag) throws IOException { + EnumSet<XAttrSetFlag> flag) throws IOException { throw new UnsupportedOperationException(getClass().getSimpleName() + " doesn't support setXAttr"); } diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java index f65c1961b..c55d05797 100644 --- a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedFileSystemStore.java @@ -59,19 +59,18 @@ public class SeaweedFileSystemStore { } public boolean createDirectory(final Path path, UserGroupInformation currentUser, - final FsPermission permission, final FsPermission umask) { + final FsPermission permission, final FsPermission umask) { LOG.debug("createDirectory path: {} permission: {} umask: {}", - path, - permission, - umask); + path, + permission, + umask); return filerClient.mkdirs( - path.toUri().getPath(), - permissionToMode(permission, true), - currentUser.getUserName(), - currentUser.getGroupNames() - ); + path.toUri().getPath(), + permissionToMode(permission, true), + currentUser.getUserName(), + currentUser.getGroupNames()); } public FileStatus[] listEntries(final Path path) throws IOException { @@ -84,7 +83,7 @@ public class SeaweedFileSystemStore { } if (!pathStatus.isDirectory()) { - return new FileStatus[]{pathStatus}; + return new FileStatus[] { pathStatus }; } List<FileStatus> fileStatuses = new ArrayList<FileStatus>(); @@ -116,9 +115,9 @@ public class SeaweedFileSystemStore { public boolean deleteEntries(final Path path, boolean isDirectory, boolean recursive) { LOG.debug("deleteEntries path: {} isDirectory {} recursive: {}", - path, - String.valueOf(isDirectory), - String.valueOf(recursive)); + path, + String.valueOf(isDirectory), + String.valueOf(recursive)); if (path.isRoot()) { return true; @@ -146,7 +145,7 @@ public class SeaweedFileSystemStore { String owner = attributes.getUserName(); String group = attributes.getGroupNameCount() > 0 ? attributes.getGroupName(0) : ""; return new FileStatus(length, isDir, block_replication, blocksize, - modification_time, access_time, permission, owner, group, null, path); + modification_time, access_time, permission, owner, group, null, path); } public FilerProto.Entry lookupEntry(Path path) { @@ -162,27 +161,29 @@ public class SeaweedFileSystemStore { if (source.isRoot()) { return; } - LOG.info("rename source: {} destination:{}", source, destination); + FilerProto.Entry entry = lookupEntry(source); if (entry == null) { LOG.warn("rename non-existing source: {}", source); return; } + filerClient.mv(source.toUri().getPath(), destination.toUri().getPath()); } public OutputStream createFile(final Path path, - final boolean overwrite, - FsPermission permission, - int bufferSize, - String replication) throws IOException { + final boolean overwrite, + FsPermission permission, + int bufferSize, + String replication) throws IOException { permission = permission == null ? FsPermission.getFileDefault() : permission; + LOG.debug("createFile path: {} overwrite: {} permission: {}", - path, - overwrite, - permission.toString()); + path, + overwrite, + permission.toString()); UserGroupInformation userGroupInformation = UserGroupInformation.getCurrentUser(); long now = System.currentTimeMillis() / 1000L; @@ -203,20 +204,21 @@ public class SeaweedFileSystemStore { } if (entry == null) { entry = FilerProto.Entry.newBuilder() - .setName(path.getName()) - .setIsDirectory(false) - .setAttributes(FilerProto.FuseAttributes.newBuilder() - .setFileMode(permissionToMode(permission, false)) - .setCrtime(now) - .setMtime(now) - .setUserName(userGroupInformation.getUserName()) - .clearGroupName() - .addAllGroupName(Arrays.asList(userGroupInformation.getGroupNames())) - ); + .setName(path.getName()) + .setIsDirectory(false) + .setAttributes(FilerProto.FuseAttributes.newBuilder() + .setFileMode(permissionToMode(permission, false)) + .setCrtime(now) + .setMtime(now) + .setUserName(userGroupInformation.getUserName()) + .clearGroupName() + .addAllGroupName(Arrays.asList(userGroupInformation.getGroupNames()))); SeaweedWrite.writeMeta(filerClient, getParentDirectory(path), entry); } - return new SeaweedHadoopOutputStream(filerClient, path.toString(), entry, writePosition, bufferSize, replication); + + return new SeaweedHadoopOutputStream(filerClient, path.toString(), entry, writePosition, bufferSize, + replication); } @@ -231,9 +233,9 @@ public class SeaweedFileSystemStore { } return new SeaweedHadoopInputStream(filerClient, - statistics, - path.toUri().getPath(), - entry); + statistics, + path.toUri().getPath(), + entry); } public void setOwner(Path path, String owner, String group) { diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java index f26eae597..8ac5a5ab4 100644 --- a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopInputStream.java @@ -2,7 +2,6 @@ package seaweed.hdfs; // based on org.apache.hadoop.fs.azurebfs.services.AbfsInputStream -import org.apache.hadoop.fs.ByteBufferReadable; import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FileSystem.Statistics; import seaweedfs.client.FilerClient; @@ -11,12 +10,21 @@ import seaweedfs.client.SeaweedInputStream; import java.io.EOFException; import java.io.IOException; -import java.nio.ByteBuffer; -public class SeaweedHadoopInputStream extends FSInputStream implements ByteBufferReadable { +/** + * SeaweedFS Hadoop InputStream. + * + * NOTE: Does NOT implement ByteBufferReadable to match RawLocalFileSystem + * behavior. + * This ensures BufferedFSInputStream is used, which properly handles position + * tracking + * for positioned reads (critical for Parquet and other formats). + */ +public class SeaweedHadoopInputStream extends FSInputStream { private final SeaweedInputStream seaweedInputStream; private final Statistics statistics; + private final String path; public SeaweedHadoopInputStream( final FilerClient filerClient, @@ -25,6 +33,7 @@ public class SeaweedHadoopInputStream extends FSInputStream implements ByteBuffe final FilerProto.Entry entry) throws IOException { this.seaweedInputStream = new SeaweedInputStream(filerClient, path, entry); this.statistics = statistics; + this.path = path; } @Override @@ -37,20 +46,6 @@ public class SeaweedHadoopInputStream extends FSInputStream implements ByteBuffe return seaweedInputStream.read(b, off, len); } - // implement ByteBufferReadable - @Override - public synchronized int read(ByteBuffer buf) throws IOException { - int bytesRead = seaweedInputStream.read(buf); - - if (bytesRead > 0) { - if (statistics != null) { - statistics.incrementBytesRead(bytesRead); - } - } - - return bytesRead; - } - /** * Seek to given position in stream. * @@ -83,8 +78,10 @@ public class SeaweedHadoopInputStream extends FSInputStream implements ByteBuffe } /** - * Returns the length of the file that this stream refers to. Note that the length returned is the length - * as of the time the Stream was opened. Specifically, if there have been subsequent appends to the file, + * Returns the length of the file that this stream refers to. Note that the + * length returned is the length + * as of the time the Stream was opened. Specifically, if there have been + * subsequent appends to the file, * they wont be reflected in the returned length. * * @return length of the file. @@ -104,8 +101,12 @@ public class SeaweedHadoopInputStream extends FSInputStream implements ByteBuffe return seaweedInputStream.getPos(); } + public String getPath() { + return path; + } + /** - * Seeks a different copy of the data. Returns true if + * Seeks a different copy of the data. Returns true if * found a new source, false otherwise. * * @throws IOException throws {@link IOException} if there is an error @@ -139,7 +140,9 @@ public class SeaweedHadoopInputStream extends FSInputStream implements ByteBuffe } /** - * gets whether mark and reset are supported by {@code ADLFileInputStream}. Always returns false. + * gets whether mark and reset are supported by + * {@code SeaweedHadoopInputStream}. + * Always returns false. * * @return always {@code false} */ diff --git a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopOutputStream.java b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopOutputStream.java index 1740312fe..a1a43820c 100644 --- a/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopOutputStream.java +++ b/other/java/hdfs3/src/main/java/seaweed/hdfs/SeaweedHadoopOutputStream.java @@ -4,6 +4,8 @@ package seaweed.hdfs; import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.fs.Syncable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import seaweedfs.client.FilerClient; import seaweedfs.client.FilerProto; import seaweedfs.client.SeaweedOutputStream; @@ -13,9 +15,12 @@ import java.util.Locale; public class SeaweedHadoopOutputStream extends SeaweedOutputStream implements Syncable, StreamCapabilities { + private static final Logger LOG = LoggerFactory.getLogger(SeaweedHadoopOutputStream.class); + public SeaweedHadoopOutputStream(FilerClient filerClient, final String path, FilerProto.Entry.Builder entry, - final long position, final int bufferSize, final String replication) { + final long position, final int bufferSize, final String replication) { super(filerClient, path, entry, position, bufferSize, replication); + } /** @@ -26,6 +31,7 @@ public class SeaweedHadoopOutputStream extends SeaweedOutputStream implements Sy */ @Override public void hsync() throws IOException { + if (supportFlush) { flushInternal(); } @@ -39,6 +45,7 @@ public class SeaweedHadoopOutputStream extends SeaweedOutputStream implements Sy */ @Override public void hflush() throws IOException { + if (supportFlush) { flushInternal(); } diff --git a/test/java/spark/.gitignore b/test/java/spark/.gitignore new file mode 100644 index 000000000..62341354a --- /dev/null +++ b/test/java/spark/.gitignore @@ -0,0 +1,33 @@ +# Maven +target/ +.m2/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties + +# IDE +.idea/ +*.iml +.vscode/ +.classpath +.project +.settings/ + +# Spark +spark-warehouse/ +metastore_db/ +derby.log + +# Logs +*.log + +# OS +.DS_Store +Thumbs.db + + diff --git a/test/java/spark/Makefile b/test/java/spark/Makefile new file mode 100644 index 000000000..462447c66 --- /dev/null +++ b/test/java/spark/Makefile @@ -0,0 +1,75 @@ +.PHONY: help build test test-local test-docker clean run-example docker-up docker-down + +help: + @echo "SeaweedFS Spark Integration Tests" + @echo "" + @echo "Available targets:" + @echo " build - Build the project" + @echo " test - Run integration tests (requires SeaweedFS running)" + @echo " test-local - Run tests against local SeaweedFS" + @echo " test-docker - Run tests in Docker with SeaweedFS" + @echo " run-example - Run the example Spark application" + @echo " docker-up - Start SeaweedFS in Docker" + @echo " docker-down - Stop SeaweedFS Docker containers" + @echo " clean - Clean build artifacts" + +build: + mvn clean package + +test: + @if [ -z "$$SEAWEEDFS_TEST_ENABLED" ]; then \ + echo "Setting SEAWEEDFS_TEST_ENABLED=true"; \ + fi + SEAWEEDFS_TEST_ENABLED=true mvn test + +test-local: + @echo "Testing against local SeaweedFS (localhost:8888)..." + ./run-tests.sh + +test-docker: + @echo "Running tests in Docker..." + docker compose up --build --abort-on-container-exit spark-tests + docker compose down + +docker-up: + @echo "Starting SeaweedFS in Docker..." + docker compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer + @echo "Waiting for services to be ready..." + @sleep 5 + @echo "SeaweedFS is ready!" + @echo " Master: http://localhost:9333" + @echo " Filer: http://localhost:8888" + +docker-down: + @echo "Stopping SeaweedFS Docker containers..." + docker compose down -v + +run-example: + @echo "Running example application..." + @if ! command -v spark-submit > /dev/null; then \ + echo "Error: spark-submit not found. Please install Apache Spark."; \ + exit 1; \ + fi + spark-submit \ + --class seaweed.spark.SparkSeaweedFSExample \ + --master local[2] \ + --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \ + --conf spark.hadoop.fs.seaweed.filer.host=localhost \ + --conf spark.hadoop.fs.seaweed.filer.port=8888 \ + --conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \ + --conf spark.hadoop.fs.seaweed.replication="" \ + target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \ + seaweedfs://localhost:8888/spark-example-output + +clean: + mvn clean + @echo "Build artifacts cleaned" + +verify-seaweedfs: + @echo "Verifying SeaweedFS connection..." + @curl -f http://localhost:8888/ > /dev/null 2>&1 && \ + echo "✓ SeaweedFS filer is accessible" || \ + (echo "✗ SeaweedFS filer is not accessible at http://localhost:8888"; exit 1) + +.DEFAULT_GOAL := help + diff --git a/test/java/spark/docker-compose.yml b/test/java/spark/docker-compose.yml new file mode 100644 index 000000000..ed8757b88 --- /dev/null +++ b/test/java/spark/docker-compose.yml @@ -0,0 +1,100 @@ +services: + seaweedfs-master: + build: + context: ../../../docker + dockerfile: Dockerfile.local + image: seaweedfs:local + container_name: seaweedfs-spark-master + ports: + - "9333:9333" + - "19333:19333" + command: "master -ip=seaweedfs-master -ip.bind=0.0.0.0 -port=9333 -port.grpc=19333 -volumeSizeLimitMB=50 -defaultReplication=000 -peers=none" + networks: + - seaweedfs-spark + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:9333/cluster/status"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 10s + + seaweedfs-volume: + build: + context: ../../../docker + dockerfile: Dockerfile.local + image: seaweedfs:local + container_name: seaweedfs-spark-volume + ports: + - "8080:8080" + - "18080:18080" + command: "volume -mserver=seaweedfs-master:9333 -ip=seaweedfs-volume -ip.bind=0.0.0.0 -port=8080 -port.grpc=18080 -publicUrl=seaweedfs-volume:8080 -max=100 -dir=/data -preStopSeconds=1" + volumes: + - seaweedfs-volume-data:/data + depends_on: + seaweedfs-master: + condition: service_healthy + networks: + - seaweedfs-spark + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080/status"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 10s + + seaweedfs-filer: + build: + context: ../../../docker + dockerfile: Dockerfile.local + image: seaweedfs:local + container_name: seaweedfs-spark-filer + ports: + - "8888:8888" + - "18888:18888" + command: "filer -master=seaweedfs-master:9333 -ip=seaweedfs-filer -ip.bind=0.0.0.0 -port=8888 -port.grpc=18888" + depends_on: + seaweedfs-master: + condition: service_healthy + seaweedfs-volume: + condition: service_healthy + networks: + - seaweedfs-spark + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:8888/"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 15s + + spark-tests: + image: maven:3.9-eclipse-temurin-17 + container_name: seaweedfs-spark-tests + volumes: + - .:/workspace + - ./.m2:/root/.m2 + working_dir: /workspace + environment: + - SEAWEEDFS_TEST_ENABLED=true + - SEAWEEDFS_FILER_HOST=seaweedfs-filer + - SEAWEEDFS_FILER_PORT=8888 + - SEAWEEDFS_FILER_GRPC_PORT=18888 + - HADOOP_HOME=/tmp + # Disable Java DNS caching to ensure fresh DNS lookups + - MAVEN_OPTS=-Dsun.net.inetaddr.ttl=0 -Dnetworkaddress.cache.ttl=0 + - SPARK_SUBMIT_OPTS=-Dfs.seaweedfs.impl.disable.cache=true + command: sh -c "sleep 30 && mvn clean test" + depends_on: + seaweedfs-filer: + condition: service_healthy + networks: + - seaweedfs-spark + mem_limit: 4g + cpus: 2 + +networks: + seaweedfs-spark: + driver: bridge + +volumes: + seaweedfs-volume-data: + diff --git a/test/java/spark/pom.xml b/test/java/spark/pom.xml new file mode 100644 index 000000000..22228a856 --- /dev/null +++ b/test/java/spark/pom.xml @@ -0,0 +1,348 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>com.seaweedfs</groupId> + <artifactId>seaweedfs-spark-integration-tests</artifactId> + <version>1.0-SNAPSHOT</version> + <packaging>jar</packaging> + + <name>SeaweedFS Spark Integration Tests</name> + <description>Integration tests for Apache Spark with SeaweedFS HDFS client</description> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <maven.compiler.source>11</maven.compiler.source> + <maven.compiler.target>11</maven.compiler.target> + <spark.version>3.5.0</spark.version> + <hadoop.version>3.3.6</hadoop.version> + <scala.binary.version>2.12</scala.binary.version> + <junit.version>4.13.2</junit.version> + <seaweedfs.hadoop3.client.version>3.80.1-SNAPSHOT</seaweedfs.hadoop3.client.version> + <jackson.version>2.18.2</jackson.version> <!-- Upgraded from 2.15.3 --> + <netty.version>4.1.125.Final</netty.version> <!-- Upgraded to 4.1.125.Final for security fixes (CVE in netty-codec < 4.1.125.Final, netty-codec-http2 <= 4.1.123.Final) --> + <parquet.version>1.15.2</parquet.version> <!-- Upgraded to 1.15.2 for security fix --> + <parquet.format.version>2.12.0</parquet.format.version> + <surefire.jvm.args> + -Xmx2g + -Dhadoop.home.dir=/tmp + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED + --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED + --add-opens=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED + --add-exports=java.base/sun.nio.ch=ALL-UNNAMED + </surefire.jvm.args> + </properties> + + <!-- Override vulnerable transitive dependencies --> + <dependencyManagement> + <dependencies> + <!-- Jackson - Fix CVEs in older versions --> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>${jackson.version}</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-core</artifactId> + <version>${jackson.version}</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <version>${jackson.version}</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.dataformat</groupId> + <artifactId>jackson-dataformat-yaml</artifactId> + <version>${jackson.version}</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.module</groupId> + <artifactId>jackson-module-scala_${scala.binary.version}</artifactId> + <version>${jackson.version}</version> + </dependency> + + <!-- Netty - Fix CVEs in older versions --> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-all</artifactId> + <version>${netty.version}</version> + </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-handler</artifactId> + <version>${netty.version}</version> + </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-transport</artifactId> + <version>${netty.version}</version> + </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-transport-native-epoll</artifactId> + <version>${netty.version}</version> + </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-codec</artifactId> + <version>${netty.version}</version> + </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-codec-http</artifactId> + <version>${netty.version}</version> + </dependency> + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-codec-http2</artifactId> + <version>${netty.version}</version> + </dependency> + + <!-- Apache Avro - Fix CVEs --> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + <version>1.11.4</version> + </dependency> + + <!-- Apache ZooKeeper - Fix CVEs --> + <dependency> + <groupId>org.apache.zookeeper</groupId> + <artifactId>zookeeper</artifactId> + <version>3.9.4</version> + </dependency> + + <!-- Apache Commons - Fix CVEs --> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + <version>1.26.0</version> + </dependency> + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + <version>2.15.1</version> + </dependency> + <dependency> + <groupId>commons-beanutils</groupId> + <artifactId>commons-beanutils</artifactId> + <version>1.11.0</version> + </dependency> + + <!-- Guava - Fix CVEs --> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>32.1.3-jre</version> + </dependency> + + <!-- SnakeYAML - Fix CVEs --> + <dependency> + <groupId>org.yaml</groupId> + <artifactId>snakeyaml</artifactId> + <version>2.2</version> + </dependency> + + <!-- Protobuf - Fix CVEs --> + <dependency> + <groupId>com.google.protobuf</groupId> + <artifactId>protobuf-java</artifactId> + <version>3.25.5</version> + </dependency> + + <!-- Nimbus JOSE JWT - Fix CVEs (GHSA-xwmg-2g98-w7v9 and others) --> + <dependency> + <groupId>com.nimbusds</groupId> + <artifactId>nimbus-jose-jwt</artifactId> + <version>10.0.2</version> + </dependency> + + <!-- Snappy Java - Fix CVEs --> + <dependency> + <groupId>org.xerial.snappy</groupId> + <artifactId>snappy-java</artifactId> + <version>1.1.10.4</version> + </dependency> + + <!-- DNS Java - Fix CVEs --> + <dependency> + <groupId>dnsjava</groupId> + <artifactId>dnsjava</artifactId> + <version>3.6.0</version> + </dependency> + + <!-- Apache Parquet - Upgrade to latest for bug fixes --> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-common</artifactId> + <version>${parquet.version}</version> + </dependency> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-encoding</artifactId> + <version>${parquet.version}</version> + </dependency> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-column</artifactId> + <version>${parquet.version}</version> + </dependency> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-hadoop</artifactId> + <version>${parquet.version}</version> + </dependency> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-avro</artifactId> + <version>${parquet.version}</version> + </dependency> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-format-structures</artifactId> + <version>${parquet.version}</version> + </dependency> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-format</artifactId> + <version>${parquet.format.version}</version> + </dependency> + + </dependencies> + </dependencyManagement> + + <dependencies> + <!-- Spark Core --> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core_${scala.binary.version}</artifactId> + <version>${spark.version}</version> + <scope>provided</scope> + </dependency> + + <!-- Spark SQL --> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-sql_${scala.binary.version}</artifactId> + <version>${spark.version}</version> + <scope>provided</scope> + </dependency> + + <!-- Hadoop Client --> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop.version}</version> + <scope>provided</scope> + </dependency> + + <!-- SeaweedFS Hadoop3 Client --> + <dependency> + <groupId>com.seaweedfs</groupId> + <artifactId>seaweedfs-hadoop3-client</artifactId> + <version>${seaweedfs.hadoop3.client.version}</version> + </dependency> + + <!-- Testing --> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>${junit.version}</version> + <scope>test</scope> + </dependency> + + <!-- Logging --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + <version>1.7.36</version> + </dependency> + + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-reload4j</artifactId> + <version>1.7.36</version> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.11.0</version> + <configuration> + <source>${maven.compiler.source}</source> + <target>${maven.compiler.target}</target> + </configuration> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>3.0.0</version> + <configuration> + <skipTests>${skipTests}</skipTests> + <includes> + <include>**/*Test.java</include> + </includes> + <argLine>${surefire.jvm.args}</argLine> + <systemPropertyVariables> + <log4j.configuration>file:${project.basedir}/src/test/resources/log4j.properties</log4j.configuration> + </systemPropertyVariables> + <environmentVariables> + <HADOOP_HOME>/tmp</HADOOP_HOME> + </environmentVariables> + </configuration> + </plugin> + + <!-- Shade plugin to create fat jar for Spark submit --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>3.5.0</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> + <mainClass>seaweed.spark.SparkSeaweedFSExample</mainClass> + </transformer> + </transformers> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> + diff --git a/test/java/spark/quick-start.sh b/test/java/spark/quick-start.sh new file mode 100755 index 000000000..974363311 --- /dev/null +++ b/test/java/spark/quick-start.sh @@ -0,0 +1,149 @@ +#!/bin/bash + +set -e + +echo "=== SeaweedFS Spark Integration Tests Quick Start ===" +echo "" + +# Check if SeaweedFS is running +check_seaweedfs() { + echo "Checking if SeaweedFS is running..." + if curl -f http://localhost:8888/ > /dev/null 2>&1; then + echo "✓ SeaweedFS filer is accessible at http://localhost:8888" + return 0 + else + echo "✗ SeaweedFS filer is not accessible" + return 1 + fi +} + +# Start SeaweedFS with Docker if not running +start_seaweedfs() { + echo "" + echo "Starting SeaweedFS with Docker..." + docker-compose up -d seaweedfs-master seaweedfs-volume seaweedfs-filer + + echo "Waiting for SeaweedFS to be ready..." + for i in {1..30}; do + if curl -f http://localhost:8888/ > /dev/null 2>&1; then + echo "✓ SeaweedFS is ready!" + return 0 + fi + echo -n "." + sleep 2 + done + + echo "" + echo "✗ SeaweedFS failed to start" + return 1 +} + +# Build the project +build_project() { + echo "" + echo "Building the project..." + mvn clean package -DskipTests + echo "✓ Build completed" +} + +# Run tests +run_tests() { + echo "" + echo "Running integration tests..." + export SEAWEEDFS_TEST_ENABLED=true + mvn test + echo "✓ Tests completed" +} + +# Run example +run_example() { + echo "" + echo "Running example application..." + + if ! command -v spark-submit > /dev/null; then + echo "⚠ spark-submit not found. Skipping example application." + echo "To run the example, install Apache Spark and try: make run-example" + return 0 + fi + + spark-submit \ + --class seaweed.spark.SparkSeaweedFSExample \ + --master local[2] \ + --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \ + --conf spark.hadoop.fs.seaweed.filer.host=localhost \ + --conf spark.hadoop.fs.seaweed.filer.port=8888 \ + --conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \ + target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \ + seaweedfs://localhost:8888/spark-quickstart-output + + echo "✓ Example completed" +} + +# Cleanup +cleanup() { + echo "" + echo "Cleaning up..." + docker-compose down -v + echo "✓ Cleanup completed" +} + +# Main execution +main() { + # Check if Docker is available + if ! command -v docker > /dev/null; then + echo "Error: Docker is not installed or not in PATH" + exit 1 + fi + + # Check if Maven is available + if ! command -v mvn > /dev/null; then + echo "Error: Maven is not installed or not in PATH" + exit 1 + fi + + # Check if SeaweedFS is running, if not start it + if ! check_seaweedfs; then + read -p "Do you want to start SeaweedFS with Docker? (y/n) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + start_seaweedfs || exit 1 + else + echo "Please start SeaweedFS manually and rerun this script." + exit 1 + fi + fi + + # Build project + build_project || exit 1 + + # Run tests + run_tests || exit 1 + + # Run example if Spark is available + run_example + + echo "" + echo "=== Quick Start Completed Successfully! ===" + echo "" + echo "Next steps:" + echo " - View test results in target/surefire-reports/" + echo " - Check example output at http://localhost:8888/" + echo " - Run 'make help' for more options" + echo " - Read README.md for detailed documentation" + echo "" + + read -p "Do you want to stop SeaweedFS? (y/n) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + cleanup + fi +} + +# Handle Ctrl+C +trap cleanup INT + +# Run main +main + + + diff --git a/test/java/spark/run-tests.sh b/test/java/spark/run-tests.sh new file mode 100755 index 000000000..f637c8c59 --- /dev/null +++ b/test/java/spark/run-tests.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +set -e + +echo "=== SeaweedFS Spark Integration Tests Runner ===" +echo "" + +# Check if SeaweedFS is running +check_seaweedfs() { + if curl -f http://localhost:8888/ > /dev/null 2>&1; then + echo "✓ SeaweedFS filer is accessible at http://localhost:8888" + return 0 + else + echo "✗ SeaweedFS filer is not accessible" + return 1 + fi +} + +# Main +if ! check_seaweedfs; then + echo "" + echo "Please start SeaweedFS first. You can use:" + echo " cd test/java/spark && docker-compose up -d" + echo "Or:" + echo " make docker-up" + exit 1 +fi + +echo "" +echo "Running Spark integration tests..." +echo "" + +export SEAWEEDFS_TEST_ENABLED=true +export SEAWEEDFS_FILER_HOST=localhost +export SEAWEEDFS_FILER_PORT=8888 +export SEAWEEDFS_FILER_GRPC_PORT=18888 + +# Run tests +mvn test "$@" + +echo "" +echo "✓ Test run completed" +echo "View detailed reports in: target/surefire-reports/" + + + diff --git a/test/java/spark/src/main/java/seaweed/spark/SparkSeaweedFSExample.java b/test/java/spark/src/main/java/seaweed/spark/SparkSeaweedFSExample.java new file mode 100644 index 000000000..75b2d710b --- /dev/null +++ b/test/java/spark/src/main/java/seaweed/spark/SparkSeaweedFSExample.java @@ -0,0 +1,138 @@ +package seaweed.spark; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; + +/** + * Example Spark application demonstrating SeaweedFS integration. + * + * This can be submitted to a Spark cluster using spark-submit. + * + * Example usage: + * spark-submit \ + * --class seaweed.spark.SparkSeaweedFSExample \ + * --master local[2] \ + * --conf spark.hadoop.fs.seaweedfs.impl=seaweed.hdfs.SeaweedFileSystem \ + * --conf spark.hadoop.fs.seaweed.filer.host=localhost \ + * --conf spark.hadoop.fs.seaweed.filer.port=8888 \ + * --conf spark.hadoop.fs.seaweed.filer.port.grpc=18888 \ + * target/seaweedfs-spark-integration-tests-1.0-SNAPSHOT.jar \ + * seaweedfs://localhost:8888/output + */ +public class SparkSeaweedFSExample { + + public static void main(String[] args) { + if (args.length < 1) { + System.err.println("Usage: SparkSeaweedFSExample <output-path>"); + System.err.println("Example: seaweedfs://localhost:8888/spark-output"); + System.exit(1); + } + + String outputPath = args[0]; + + // Create Spark session + SparkSession spark = SparkSession.builder() + .appName("SeaweedFS Spark Example") + .getOrCreate(); + + try { + System.out.println("=== SeaweedFS Spark Integration Example ===\n"); + + // Example 1: Generate data and write to SeaweedFS + System.out.println("1. Generating sample data..."); + Dataset<Row> data = spark.range(0, 1000) + .selectExpr( + "id", + "id * 2 as doubled", + "CAST(rand() * 100 AS INT) as random_value"); + + System.out.println(" Generated " + data.count() + " rows"); + data.show(5); + + // Write as Parquet + String parquetPath = outputPath + "/data.parquet"; + System.out.println("\n2. Writing data to SeaweedFS as Parquet..."); + System.out.println(" Path: " + parquetPath); + + data.write() + .mode(SaveMode.Overwrite) + .parquet(parquetPath); + + System.out.println(" ✓ Write completed"); + + // Read back and verify + System.out.println("\n3. Reading data back from SeaweedFS..."); + Dataset<Row> readData = spark.read().parquet(parquetPath); + System.out.println(" Read " + readData.count() + " rows"); + + // Perform aggregation + System.out.println("\n4. Performing aggregation..."); + Dataset<Row> stats = readData.selectExpr( + "COUNT(*) as count", + "AVG(random_value) as avg_random", + "MAX(doubled) as max_doubled"); + + stats.show(); + + // Write aggregation results + String statsPath = outputPath + "/stats.parquet"; + System.out.println("5. Writing stats to: " + statsPath); + stats.write() + .mode(SaveMode.Overwrite) + .parquet(statsPath); + + // Create a partitioned dataset + System.out.println("\n6. Creating partitioned dataset..."); + Dataset<Row> partitionedData = data.selectExpr( + "*", + "CAST(id % 10 AS INT) as partition_key"); + + String partitionedPath = outputPath + "/partitioned.parquet"; + System.out.println(" Path: " + partitionedPath); + + partitionedData.write() + .mode(SaveMode.Overwrite) + .partitionBy("partition_key") + .parquet(partitionedPath); + + System.out.println(" ✓ Partitioned write completed"); + + // Read specific partition + System.out.println("\n7. Reading specific partition (partition_key=0)..."); + Dataset<Row> partition0 = spark.read() + .parquet(partitionedPath) + .filter("partition_key = 0"); + + System.out.println(" Partition 0 contains " + partition0.count() + " rows"); + partition0.show(5); + + // SQL example + System.out.println("\n8. Using Spark SQL..."); + readData.createOrReplaceTempView("seaweedfs_data"); + + Dataset<Row> sqlResult = spark.sql( + "SELECT " + + " CAST(id / 100 AS INT) as bucket, " + + " COUNT(*) as count, " + + " AVG(random_value) as avg_random " + + "FROM seaweedfs_data " + + "GROUP BY CAST(id / 100 AS INT) " + + "ORDER BY bucket"); + + System.out.println(" Bucketed statistics:"); + sqlResult.show(); + + System.out.println("\n=== Example completed successfully! ==="); + System.out.println("Output location: " + outputPath); + + } catch (Exception e) { + System.err.println("Error: " + e.getMessage()); + e.printStackTrace(); + System.exit(1); + } finally { + spark.stop(); + } + } +} diff --git a/test/java/spark/src/test/java/seaweed/spark/GetPosBufferTest.java b/test/java/spark/src/test/java/seaweed/spark/GetPosBufferTest.java new file mode 100644 index 000000000..86dde66ab --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/GetPosBufferTest.java @@ -0,0 +1,308 @@ +package seaweed.spark; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import seaweedfs.client.FilerClient; +import seaweedfs.client.FilerProto; +import seaweedfs.client.SeaweedInputStream; +import seaweedfs.client.SeaweedOutputStream; +import seaweedfs.client.SeaweedRead; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import static org.junit.Assert.*; + +/** + * Unit test to reproduce the Parquet EOF issue. + * + * The issue: When Parquet writes column chunks, it calls getPos() to record + * offsets. + * If getPos() returns a position that doesn't include buffered (unflushed) + * data, + * the footer metadata will have incorrect offsets. + * + * This test simulates Parquet's behavior: + * 1. Write some data (column chunk 1) + * 2. Call getPos() - Parquet records this as the END of chunk 1 + * 3. Write more data (column chunk 2) + * 4. Call getPos() - Parquet records this as the END of chunk 2 + * 5. Close the file + * 6. Verify that the recorded positions match the actual file content + * + * Prerequisites: + * - SeaweedFS master, volume server, and filer must be running + * - Default ports: filer HTTP 8888, filer gRPC 18888 + * + * To run: + * export SEAWEEDFS_TEST_ENABLED=true + * cd other/java/client + * mvn test -Dtest=GetPosBufferTest + */ +public class GetPosBufferTest { + + private FilerClient filerClient; + private static final String TEST_ROOT = "/test-getpos-buffer"; + private static final boolean TESTS_ENABLED = "true".equalsIgnoreCase(System.getenv("SEAWEEDFS_TEST_ENABLED")); + + @Before + public void setUp() throws Exception { + if (!TESTS_ENABLED) { + return; + } + + String filerHost = System.getenv().getOrDefault("SEAWEEDFS_FILER_HOST", "localhost"); + String filerGrpcPort = System.getenv().getOrDefault("SEAWEEDFS_FILER_GRPC_PORT", "18888"); + + filerClient = new FilerClient(filerHost, Integer.parseInt(filerGrpcPort)); + + // Clean up any existing test directory + if (filerClient.exists(TEST_ROOT)) { + filerClient.rm(TEST_ROOT, true, true); + } + + // Create test root directory + filerClient.mkdirs(TEST_ROOT, 0755); + } + + @After + public void tearDown() throws Exception { + if (!TESTS_ENABLED) { + return; + } + if (filerClient != null) { + filerClient.rm(TEST_ROOT, true, true); + filerClient.shutdown(); + } + } + + @Test + public void testGetPosWithBufferedData() throws IOException { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n=== Testing getPos() with buffered data ==="); + + String testPath = TEST_ROOT + "/getpos-test.bin"; + + // Simulate what Parquet does when writing column chunks + SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); + + // Write "column chunk 1" - 100 bytes + byte[] chunk1 = new byte[100]; + for (int i = 0; i < 100; i++) { + chunk1[i] = (byte) i; + } + outputStream.write(chunk1); + + // Parquet calls getPos() here to record end of chunk 1 + long posAfterChunk1 = outputStream.getPos(); + System.out.println("Position after chunk 1 (100 bytes): " + posAfterChunk1); + assertEquals("getPos() should return 100 after writing 100 bytes", 100, posAfterChunk1); + + // Write "column chunk 2" - 200 bytes + byte[] chunk2 = new byte[200]; + for (int i = 0; i < 200; i++) { + chunk2[i] = (byte) (i + 100); + } + outputStream.write(chunk2); + + // Parquet calls getPos() here to record end of chunk 2 + long posAfterChunk2 = outputStream.getPos(); + System.out.println("Position after chunk 2 (200 more bytes): " + posAfterChunk2); + assertEquals("getPos() should return 300 after writing 300 bytes total", 300, posAfterChunk2); + + // Write "column chunk 3" - small chunk of 78 bytes (the problematic size!) + byte[] chunk3 = new byte[78]; + for (int i = 0; i < 78; i++) { + chunk3[i] = (byte) (i + 50); + } + outputStream.write(chunk3); + + // Parquet calls getPos() here to record end of chunk 3 + long posAfterChunk3 = outputStream.getPos(); + System.out.println("Position after chunk 3 (78 more bytes): " + posAfterChunk3); + assertEquals("getPos() should return 378 after writing 378 bytes total", 378, posAfterChunk3); + + // Close to flush everything + outputStream.close(); + System.out.println("File closed successfully"); + + // Now read the file and verify its actual size matches what getPos() reported + FilerProto.Entry entry = filerClient.lookupEntry( + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + + long actualFileSize = SeaweedRead.fileSize(entry); + System.out.println("Actual file size on disk: " + actualFileSize); + + assertEquals("File size should match the last getPos() value", 378, actualFileSize); + + // Now read the file and verify we can read all the data + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); + + byte[] readBuffer = new byte[500]; // Larger buffer to read everything + int totalRead = 0; + int bytesRead; + while ((bytesRead = inputStream.read(readBuffer, totalRead, readBuffer.length - totalRead)) > 0) { + totalRead += bytesRead; + } + inputStream.close(); + + System.out.println("Total bytes read: " + totalRead); + assertEquals("Should read exactly 378 bytes", 378, totalRead); + + // Verify the data is correct + for (int i = 0; i < 100; i++) { + assertEquals("Chunk 1 data mismatch at byte " + i, (byte) i, readBuffer[i]); + } + for (int i = 0; i < 200; i++) { + assertEquals("Chunk 2 data mismatch at byte " + (100 + i), (byte) (i + 100), readBuffer[100 + i]); + } + for (int i = 0; i < 78; i++) { + assertEquals("Chunk 3 data mismatch at byte " + (300 + i), (byte) (i + 50), readBuffer[300 + i]); + } + + System.out.println("SUCCESS: All data verified correctly!\n"); + } + + @Test + public void testGetPosWithSmallWrites() throws IOException { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n=== Testing getPos() with many small writes (Parquet pattern) ==="); + + String testPath = TEST_ROOT + "/small-writes-test.bin"; + + SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); + + // Parquet writes column data in small chunks and frequently calls getPos() + String[] columnData = { "Alice", "Bob", "Charlie", "David" }; + long[] recordedPositions = new long[columnData.length]; + + for (int i = 0; i < columnData.length; i++) { + byte[] data = columnData[i].getBytes(StandardCharsets.UTF_8); + outputStream.write(data); + + // Parquet calls getPos() after each value to track offsets + recordedPositions[i] = outputStream.getPos(); + System.out.println("After writing '" + columnData[i] + "': pos=" + recordedPositions[i]); + } + + long finalPos = outputStream.getPos(); + System.out.println("Final position before close: " + finalPos); + + outputStream.close(); + + // Verify file size + FilerProto.Entry entry = filerClient.lookupEntry( + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + long actualFileSize = SeaweedRead.fileSize(entry); + + System.out.println("Actual file size: " + actualFileSize); + assertEquals("File size should match final getPos()", finalPos, actualFileSize); + + // Verify we can read using the recorded positions + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); + + long currentPos = 0; + for (int i = 0; i < columnData.length; i++) { + long nextPos = recordedPositions[i]; + int length = (int) (nextPos - currentPos); + + byte[] buffer = new byte[length]; + int bytesRead = inputStream.read(buffer, 0, length); + + assertEquals("Should read " + length + " bytes for '" + columnData[i] + "'", length, bytesRead); + + String readData = new String(buffer, 0, bytesRead, StandardCharsets.UTF_8); + System.out.println("Read at offset " + currentPos + ": '" + readData + "'"); + assertEquals("Data mismatch", columnData[i], readData); + + currentPos = nextPos; + } + + inputStream.close(); + + System.out.println("SUCCESS: Small writes with getPos() tracking work correctly!\n"); + } + + @Test + public void testGetPosWithExactly78BytesBuffered() throws IOException { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n=== Testing getPos() with EXACTLY 78 bytes buffered (the bug size!) ==="); + + String testPath = TEST_ROOT + "/78-bytes-test.bin"; + + SeaweedOutputStream outputStream = new SeaweedOutputStream(filerClient, testPath); + + // Write some initial data + byte[] initial = new byte[1000]; + for (int i = 0; i < 1000; i++) { + initial[i] = (byte) i; + } + outputStream.write(initial); + outputStream.flush(); // Ensure this is flushed + + long posAfterFlush = outputStream.getPos(); + System.out.println("Position after 1000 bytes + flush: " + posAfterFlush); + assertEquals("Should be at position 1000 after flush", 1000, posAfterFlush); + + // Now write EXACTLY 78 bytes (the problematic buffer size in our bug) + byte[] problematicChunk = new byte[78]; + for (int i = 0; i < 78; i++) { + problematicChunk[i] = (byte) (i + 50); + } + outputStream.write(problematicChunk); + + // DO NOT FLUSH - this is the bug scenario! + // Parquet calls getPos() here while the 78 bytes are still buffered + long posWithBufferedData = outputStream.getPos(); + System.out.println("Position with 78 bytes BUFFERED (not flushed): " + posWithBufferedData); + + // This MUST return 1078, not 1000! + assertEquals("getPos() MUST include buffered data", 1078, posWithBufferedData); + + // Now close (which will flush) + outputStream.close(); + + // Verify actual file size + FilerProto.Entry entry = filerClient.lookupEntry( + SeaweedOutputStream.getParentDirectory(testPath), + SeaweedOutputStream.getFileName(testPath)); + long actualFileSize = SeaweedRead.fileSize(entry); + + System.out.println("Actual file size: " + actualFileSize); + assertEquals("File size must be 1078", 1078, actualFileSize); + + // Try to read at position 1000 for 78 bytes (what Parquet would try) + SeaweedInputStream inputStream = new SeaweedInputStream(filerClient, testPath, entry); + inputStream.seek(1000); + + byte[] readBuffer = new byte[78]; + int bytesRead = inputStream.read(readBuffer, 0, 78); + + System.out.println("Bytes read at position 1000: " + bytesRead); + assertEquals("Should successfully read 78 bytes at position 1000", 78, bytesRead); + + // Verify the data matches + for (int i = 0; i < 78; i++) { + assertEquals("Data mismatch at byte " + i, problematicChunk[i], readBuffer[i]); + } + + inputStream.close(); + + System.out.println("SUCCESS: getPos() correctly includes buffered data!\n"); + } +} diff --git a/test/java/spark/src/test/java/seaweed/spark/InputStreamComparisonTest.java b/test/java/spark/src/test/java/seaweed/spark/InputStreamComparisonTest.java new file mode 100644 index 000000000..0cfe2a53b --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/InputStreamComparisonTest.java @@ -0,0 +1,393 @@ +package seaweed.spark; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.util.HadoopInputFile; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.*; + +/** + * Compare InputStream behavior between local disk and SeaweedFS + * to understand why Spark's ParquetFileReader fails with SeaweedFS. + */ +public class InputStreamComparisonTest extends SparkTestBase { + + private static class ReadOperation { + String source; + String operation; + long position; + int requestedBytes; + int returnedBytes; + boolean isEOF; + long timestamp; + + ReadOperation(String source, String operation, long position, int requestedBytes, + int returnedBytes, boolean isEOF) { + this.source = source; + this.operation = operation; + this.position = position; + this.requestedBytes = requestedBytes; + this.returnedBytes = returnedBytes; + this.isEOF = isEOF; + this.timestamp = System.nanoTime(); + } + + @Override + public String toString() { + return String.format("[%s] %s: pos=%d, requested=%d, returned=%d, EOF=%b", + source, operation, position, requestedBytes, returnedBytes, isEOF); + } + } + + private static class LoggingInputStream extends InputStream { + private final FSDataInputStream wrapped; + private final String source; + private final List<ReadOperation> operations; + private long position = 0; + + LoggingInputStream(FSDataInputStream wrapped, String source, List<ReadOperation> operations) { + this.wrapped = wrapped; + this.source = source; + this.operations = operations; + } + + @Override + public int read() throws IOException { + int result = wrapped.read(); + operations.add(new ReadOperation(source, "read()", position, 1, + result == -1 ? 0 : 1, result == -1)); + if (result != -1) + position++; + return result; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int result = wrapped.read(b, off, len); + operations.add(new ReadOperation(source, "read(byte[])", position, len, + result == -1 ? 0 : result, result == -1)); + if (result > 0) + position += result; + return result; + } + + public int read(ByteBuffer buf) throws IOException { + int requested = buf.remaining(); + long startPos = position; + + // Use reflection to call read(ByteBuffer) if available + try { + java.lang.reflect.Method method = wrapped.getClass().getMethod("read", ByteBuffer.class); + int result = (int) method.invoke(wrapped, buf); + operations.add(new ReadOperation(source, "read(ByteBuffer)", startPos, requested, + result == -1 ? 0 : result, result == -1)); + if (result > 0) + position += result; + return result; + } catch (Exception e) { + // Fallback to byte array read + byte[] temp = new byte[requested]; + int result = wrapped.read(temp, 0, requested); + if (result > 0) { + buf.put(temp, 0, result); + } + operations.add(new ReadOperation(source, "read(ByteBuffer-fallback)", startPos, requested, + result == -1 ? 0 : result, result == -1)); + if (result > 0) + position += result; + return result; + } + } + + @Override + public long skip(long n) throws IOException { + long result = wrapped.skip(n); + operations.add(new ReadOperation(source, "skip()", position, (int) n, (int) result, false)); + position += result; + return result; + } + + @Override + public int available() throws IOException { + int result = wrapped.available(); + operations.add(new ReadOperation(source, "available()", position, 0, result, false)); + return result; + } + + @Override + public void close() throws IOException { + operations.add(new ReadOperation(source, "close()", position, 0, 0, false)); + wrapped.close(); + } + + public void seek(long pos) throws IOException { + wrapped.seek(pos); + operations.add(new ReadOperation(source, "seek()", position, 0, 0, false)); + position = pos; + } + + public long getPos() throws IOException { + long pos = wrapped.getPos(); + operations.add(new ReadOperation(source, "getPos()", position, 0, 0, false)); + return pos; + } + } + + @Before + public void setUp() throws IOException { + if (!TESTS_ENABLED) { + return; + } + super.setUpSpark(); + } + + @After + public void tearDown() throws IOException { + if (!TESTS_ENABLED) { + return; + } + super.tearDownSpark(); + } + + @Test + public void testCompareInputStreamBehavior() throws Exception { + skipIfTestsDisabled(); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ REAL-TIME INPUTSTREAM COMPARISON: LOCAL vs SEAWEEDFS ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + + // Write a Parquet file to both locations + System.out.println("\n1. Writing identical Parquet files..."); + + List<SparkSQLTest.Employee> employees = java.util.Arrays.asList( + new SparkSQLTest.Employee(1, "Alice", "Engineering", 100000), + new SparkSQLTest.Employee(2, "Bob", "Sales", 80000), + new SparkSQLTest.Employee(3, "Charlie", "Engineering", 120000), + new SparkSQLTest.Employee(4, "David", "Sales", 75000)); + + org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df = spark.createDataFrame(employees, + SparkSQLTest.Employee.class); + + String localPath = "file:///workspace/target/test-output/comparison-local"; + String seaweedPath = getTestPath("comparison-seaweed"); + + // Ensure directory exists + new java.io.File("/workspace/target/test-output").mkdirs(); + + df.write().mode(org.apache.spark.sql.SaveMode.Overwrite).parquet(localPath); + df.write().mode(org.apache.spark.sql.SaveMode.Overwrite).parquet(seaweedPath); + + System.out.println(" ✅ Files written"); + + // Find the actual parquet files + Configuration conf = new Configuration(); + FileSystem localFs = FileSystem.getLocal(conf); + + conf.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); + conf.set("fs.seaweed.filer.host", SEAWEEDFS_HOST); + conf.set("fs.seaweed.filer.port", String.valueOf(SEAWEEDFS_PORT)); + FileSystem seaweedFs = FileSystem.get(URI.create(String.format("seaweedfs://%s:%s", + SEAWEEDFS_HOST, SEAWEEDFS_PORT)), conf); + + // Find parquet files + Path localFile = findParquetFile(localFs, new Path(localPath)); + Path seaweedFile = findParquetFile(seaweedFs, new Path(seaweedPath)); + + assertNotNull("Local parquet file not found", localFile); + assertNotNull("SeaweedFS parquet file not found", seaweedFile); + + System.out.println("\n2. Comparing file sizes..."); + long localSize = localFs.getFileStatus(localFile).getLen(); + long seaweedSize = seaweedFs.getFileStatus(seaweedFile).getLen(); + System.out.println(" Local: " + localSize + " bytes"); + System.out.println(" SeaweedFS: " + seaweedSize + " bytes"); + + // NOW: Open both streams with logging wrappers + List<ReadOperation> localOps = new ArrayList<>(); + List<ReadOperation> seaweedOps = new ArrayList<>(); + + System.out.println("\n3. Opening streams with logging wrappers..."); + + FSDataInputStream localStream = localFs.open(localFile); + FSDataInputStream seaweedStream = seaweedFs.open(seaweedFile); + + LoggingInputStream localLogging = new LoggingInputStream(localStream, "LOCAL", localOps); + LoggingInputStream seaweedLogging = new LoggingInputStream(seaweedStream, "SEAWEED", seaweedOps); + + System.out.println(" ✅ Streams opened"); + + // Create a dual-reader that calls both and compares + System.out.println("\n4. Performing synchronized read operations..."); + System.out.println(" (Each operation is called on BOTH streams and results are compared)\n"); + + int opCount = 0; + boolean mismatchFound = false; + + // Operation 1: Read 4 bytes (magic bytes) + opCount++; + System.out.println(" Op " + opCount + ": read(4 bytes) - Reading magic bytes"); + byte[] localBuf1 = new byte[4]; + byte[] seaweedBuf1 = new byte[4]; + int localRead1 = localLogging.read(localBuf1, 0, 4); + int seaweedRead1 = seaweedLogging.read(seaweedBuf1, 0, 4); + System.out.println(" LOCAL: returned " + localRead1 + " bytes: " + bytesToHex(localBuf1)); + System.out.println(" SEAWEED: returned " + seaweedRead1 + " bytes: " + bytesToHex(seaweedBuf1)); + if (localRead1 != seaweedRead1 || !java.util.Arrays.equals(localBuf1, seaweedBuf1)) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match"); + } + + // Operation 2: Seek to end - 8 bytes (footer length + magic) + opCount++; + System.out.println("\n Op " + opCount + ": seek(fileSize - 8) - Jump to footer"); + localLogging.seek(localSize - 8); + seaweedLogging.seek(seaweedSize - 8); + System.out.println(" LOCAL: seeked to " + localLogging.getPos()); + System.out.println(" SEAWEED: seeked to " + seaweedLogging.getPos()); + if (localLogging.getPos() != seaweedLogging.getPos()) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match"); + } + + // Operation 3: Read 8 bytes (footer length + magic) + opCount++; + System.out.println("\n Op " + opCount + ": read(8 bytes) - Reading footer length + magic"); + byte[] localBuf2 = new byte[8]; + byte[] seaweedBuf2 = new byte[8]; + int localRead2 = localLogging.read(localBuf2, 0, 8); + int seaweedRead2 = seaweedLogging.read(seaweedBuf2, 0, 8); + System.out.println(" LOCAL: returned " + localRead2 + " bytes: " + bytesToHex(localBuf2)); + System.out.println(" SEAWEED: returned " + seaweedRead2 + " bytes: " + bytesToHex(seaweedBuf2)); + if (localRead2 != seaweedRead2 || !java.util.Arrays.equals(localBuf2, seaweedBuf2)) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match"); + } + + // Operation 4: Calculate footer offset and seek to it + int footerLength = java.nio.ByteBuffer.wrap(localBuf2, 0, 4).order(java.nio.ByteOrder.LITTLE_ENDIAN).getInt(); + long footerOffset = localSize - 8 - footerLength; + + opCount++; + System.out.println("\n Op " + opCount + ": seek(" + footerOffset + ") - Jump to footer start"); + System.out.println(" Footer length: " + footerLength + " bytes"); + localLogging.seek(footerOffset); + seaweedLogging.seek(footerOffset); + System.out.println(" LOCAL: seeked to " + localLogging.getPos()); + System.out.println(" SEAWEED: seeked to " + seaweedLogging.getPos()); + if (localLogging.getPos() != seaweedLogging.getPos()) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match"); + } + + // Operation 5: Read entire footer + opCount++; + System.out.println("\n Op " + opCount + ": read(" + footerLength + " bytes) - Reading footer metadata"); + byte[] localFooter = new byte[footerLength]; + byte[] seaweedFooter = new byte[footerLength]; + int localRead3 = localLogging.read(localFooter, 0, footerLength); + int seaweedRead3 = seaweedLogging.read(seaweedFooter, 0, footerLength); + System.out.println(" LOCAL: returned " + localRead3 + " bytes"); + System.out.println(" SEAWEED: returned " + seaweedRead3 + " bytes"); + if (localRead3 != seaweedRead3 || !java.util.Arrays.equals(localFooter, seaweedFooter)) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + // Show first difference + for (int i = 0; i < Math.min(localRead3, seaweedRead3); i++) { + if (localFooter[i] != seaweedFooter[i]) { + System.out.println(" First difference at byte " + i + ": LOCAL=" + + String.format("0x%02X", localFooter[i]) + " SEAWEED=" + + String.format("0x%02X", seaweedFooter[i])); + break; + } + } + } else { + System.out.println(" ✅ Match - Footer metadata is IDENTICAL"); + } + + // Operation 6: Try reading past EOF + opCount++; + System.out.println("\n Op " + opCount + ": read(100 bytes) - Try reading past EOF"); + byte[] localBuf3 = new byte[100]; + byte[] seaweedBuf3 = new byte[100]; + int localRead4 = localLogging.read(localBuf3, 0, 100); + int seaweedRead4 = seaweedLogging.read(seaweedBuf3, 0, 100); + System.out.println(" LOCAL: returned " + localRead4); + System.out.println(" SEAWEED: returned " + seaweedRead4); + if (localRead4 != seaweedRead4) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match - Both returned EOF"); + } + + localLogging.close(); + seaweedLogging.close(); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ COMPARISON SUMMARY ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + System.out.println(" Total operations: " + opCount); + System.out.println(" LOCAL operations: " + localOps.size()); + System.out.println(" SEAWEED operations: " + seaweedOps.size()); + + if (mismatchFound) { + System.out.println("\n ❌ MISMATCHES FOUND - Streams behave differently!"); + } else { + System.out.println("\n ✅ ALL OPERATIONS MATCH - Streams are identical!"); + } + + System.out.println("\n Detailed operation log:"); + System.out.println(" ----------------------"); + for (int i = 0; i < Math.max(localOps.size(), seaweedOps.size()); i++) { + if (i < localOps.size()) { + System.out.println(" " + localOps.get(i)); + } + if (i < seaweedOps.size()) { + System.out.println(" " + seaweedOps.get(i)); + } + } + + assertFalse("Streams should behave identically", mismatchFound); + } + + private String bytesToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02X ", b)); + } + return sb.toString().trim(); + } + + private Path findParquetFile(FileSystem fs, Path dir) throws IOException { + org.apache.hadoop.fs.FileStatus[] files = fs.listStatus(dir); + for (org.apache.hadoop.fs.FileStatus file : files) { + if (file.getPath().getName().endsWith(".parquet") && + !file.getPath().getName().startsWith("_")) { + return file.getPath(); + } + } + return null; + } +} diff --git a/test/java/spark/src/test/java/seaweed/spark/OutputStreamComparisonTest.java b/test/java/spark/src/test/java/seaweed/spark/OutputStreamComparisonTest.java new file mode 100644 index 000000000..487cafc69 --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/OutputStreamComparisonTest.java @@ -0,0 +1,466 @@ +package seaweed.spark; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.example.data.Group; +import org.apache.parquet.example.data.simple.SimpleGroupFactory; +import org.apache.parquet.hadoop.ParquetFileWriter; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.example.GroupWriteSupport; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.MessageTypeParser; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.*; + +/** + * Compare OutputStream behavior between local disk and SeaweedFS + * to understand why Parquet files written to SeaweedFS have incorrect metadata. + */ +public class OutputStreamComparisonTest extends SparkTestBase { + + private static class WriteOperation { + String source; + String operation; + long positionBefore; + long positionAfter; + int bytesWritten; + long timestamp; + String details; + + WriteOperation(String source, String operation, long positionBefore, long positionAfter, + int bytesWritten, String details) { + this.source = source; + this.operation = operation; + this.positionBefore = positionBefore; + this.positionAfter = positionAfter; + this.bytesWritten = bytesWritten; + this.timestamp = System.nanoTime(); + this.details = details; + } + + @Override + public String toString() { + return String.format("[%s] %s: posBefore=%d, posAfter=%d, written=%d %s", + source, operation, positionBefore, positionAfter, bytesWritten, + details != null ? "(" + details + ")" : ""); + } + } + + private static class LoggingOutputStream extends OutputStream { + private final FSDataOutputStream wrapped; + private final String source; + private final List<WriteOperation> operations; + + LoggingOutputStream(FSDataOutputStream wrapped, String source, List<WriteOperation> operations) { + this.wrapped = wrapped; + this.source = source; + this.operations = operations; + } + + @Override + public void write(int b) throws IOException { + long posBefore = wrapped.getPos(); + wrapped.write(b); + long posAfter = wrapped.getPos(); + operations.add(new WriteOperation(source, "write(int)", posBefore, posAfter, 1, null)); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + long posBefore = wrapped.getPos(); + wrapped.write(b, off, len); + long posAfter = wrapped.getPos(); + operations.add(new WriteOperation(source, "write(byte[])", posBefore, posAfter, len, + "len=" + len)); + } + + @Override + public void flush() throws IOException { + long posBefore = wrapped.getPos(); + wrapped.flush(); + long posAfter = wrapped.getPos(); + operations.add(new WriteOperation(source, "flush()", posBefore, posAfter, 0, null)); + } + + @Override + public void close() throws IOException { + long posBefore = wrapped.getPos(); + wrapped.close(); + long posAfter = 0; // Can't call getPos() after close + operations.add(new WriteOperation(source, "close()", posBefore, posAfter, 0, + "finalPos=" + posBefore)); + } + + public long getPos() throws IOException { + long pos = wrapped.getPos(); + operations.add(new WriteOperation(source, "getPos()", pos, pos, 0, "returned=" + pos)); + return pos; + } + + public void hflush() throws IOException { + long posBefore = wrapped.getPos(); + wrapped.hflush(); + long posAfter = wrapped.getPos(); + operations.add(new WriteOperation(source, "hflush()", posBefore, posAfter, 0, null)); + } + + public void hsync() throws IOException { + long posBefore = wrapped.getPos(); + wrapped.hsync(); + long posAfter = wrapped.getPos(); + operations.add(new WriteOperation(source, "hsync()", posBefore, posAfter, 0, null)); + } + } + + private static final MessageType SCHEMA = MessageTypeParser.parseMessageType( + "message schema {" + + "required int32 id;" + + "required binary name;" + + "required int32 age;" + + "}" + ); + + @Before + public void setUp() throws IOException { + if (!TESTS_ENABLED) { + return; + } + super.setUpSpark(); + } + + @After + public void tearDown() throws IOException { + if (!TESTS_ENABLED) { + return; + } + super.tearDownSpark(); + } + + @Test + public void testCompareOutputStreamBehavior() throws Exception { + skipIfTestsDisabled(); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ REAL-TIME OUTPUTSTREAM COMPARISON: LOCAL vs SEAWEEDFS ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + + // Prepare file systems + Configuration conf = new Configuration(); + FileSystem localFs = FileSystem.getLocal(conf); + + conf.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); + conf.set("fs.seaweed.filer.host", SEAWEEDFS_HOST); + conf.set("fs.seaweed.filer.port", String.valueOf(SEAWEEDFS_PORT)); + FileSystem seaweedFs = FileSystem.get(URI.create(String.format("seaweedfs://%s:%s", + SEAWEEDFS_HOST, SEAWEEDFS_PORT)), conf); + + // Prepare paths + new java.io.File("/workspace/target/test-output").mkdirs(); + Path localPath = new Path("file:///workspace/target/test-output/write-comparison-local.parquet"); + Path seaweedPath = new Path(getTestPath("write-comparison-seaweed.parquet")); + + // Delete if exists + localFs.delete(localPath, false); + seaweedFs.delete(seaweedPath, false); + + List<WriteOperation> localOps = new ArrayList<>(); + List<WriteOperation> seaweedOps = new ArrayList<>(); + + System.out.println("\n1. Writing Parquet files with synchronized operations...\n"); + + // Write using ParquetWriter with custom OutputStreams + GroupWriteSupport.setSchema(SCHEMA, conf); + + // Create data + SimpleGroupFactory groupFactory = new SimpleGroupFactory(SCHEMA); + List<Group> groups = new ArrayList<>(); + groups.add(groupFactory.newGroup().append("id", 1).append("name", "Alice").append("age", 30)); + groups.add(groupFactory.newGroup().append("id", 2).append("name", "Bob").append("age", 25)); + groups.add(groupFactory.newGroup().append("id", 3).append("name", "Charlie").append("age", 35)); + + // Write to local disk + System.out.println(" Writing to LOCAL DISK..."); + try (ParquetWriter<Group> localWriter = new ParquetWriter<>( + localPath, + new GroupWriteSupport(), + CompressionCodecName.SNAPPY, + 1024 * 1024, // Block size + 1024, // Page size + 1024, // Dictionary page size + true, // Enable dictionary + false, // Don't validate + ParquetWriter.DEFAULT_WRITER_VERSION, + conf)) { + for (Group group : groups) { + localWriter.write(group); + } + } + System.out.println(" ✅ Local write complete"); + + // Write to SeaweedFS + System.out.println("\n Writing to SEAWEEDFS..."); + try (ParquetWriter<Group> seaweedWriter = new ParquetWriter<>( + seaweedPath, + new GroupWriteSupport(), + CompressionCodecName.SNAPPY, + 1024 * 1024, // Block size + 1024, // Page size + 1024, // Dictionary page size + true, // Enable dictionary + false, // Don't validate + ParquetWriter.DEFAULT_WRITER_VERSION, + conf)) { + for (Group group : groups) { + seaweedWriter.write(group); + } + } + System.out.println(" ✅ SeaweedFS write complete"); + + // Compare file sizes + System.out.println("\n2. Comparing final file sizes..."); + long localSize = localFs.getFileStatus(localPath).getLen(); + long seaweedSize = seaweedFs.getFileStatus(seaweedPath).getLen(); + System.out.println(" LOCAL: " + localSize + " bytes"); + System.out.println(" SEAWEED: " + seaweedSize + " bytes"); + + if (localSize == seaweedSize) { + System.out.println(" ✅ File sizes MATCH"); + } else { + System.out.println(" ❌ File sizes DIFFER by " + Math.abs(localSize - seaweedSize) + " bytes"); + } + + // Now test reading both files + System.out.println("\n3. Testing if both files can be read by Spark..."); + + System.out.println("\n Reading LOCAL file:"); + try { + org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> localDf = + spark.read().parquet(localPath.toString()); + long localCount = localDf.count(); + System.out.println(" ✅ LOCAL read SUCCESS - " + localCount + " rows"); + localDf.show(); + } catch (Exception e) { + System.out.println(" ❌ LOCAL read FAILED: " + e.getMessage()); + e.printStackTrace(); + } + + System.out.println("\n Reading SEAWEEDFS file:"); + try { + org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> seaweedDf = + spark.read().parquet(seaweedPath.toString()); + long seaweedCount = seaweedDf.count(); + System.out.println(" ✅ SEAWEEDFS read SUCCESS - " + seaweedCount + " rows"); + seaweedDf.show(); + } catch (Exception e) { + System.out.println(" ❌ SEAWEEDFS read FAILED: " + e.getMessage()); + e.printStackTrace(); + } + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ COMPARISON COMPLETE ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + } + + @Test + public void testCompareRawOutputStreamOperations() throws Exception { + skipIfTestsDisabled(); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ RAW OUTPUTSTREAM COMPARISON: LOCAL vs SEAWEEDFS ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + + // Prepare file systems + Configuration conf = new Configuration(); + FileSystem localFs = FileSystem.getLocal(conf); + + conf.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); + conf.set("fs.seaweed.filer.host", SEAWEEDFS_HOST); + conf.set("fs.seaweed.filer.port", String.valueOf(SEAWEEDFS_PORT)); + FileSystem seaweedFs = FileSystem.get(URI.create(String.format("seaweedfs://%s:%s", + SEAWEEDFS_HOST, SEAWEEDFS_PORT)), conf); + + // Prepare paths + new java.io.File("/workspace/target/test-output").mkdirs(); + Path localPath = new Path("file:///workspace/target/test-output/raw-comparison-local.dat"); + Path seaweedPath = new Path(getTestPath("raw-comparison-seaweed.dat")); + + // Delete if exists + localFs.delete(localPath, false); + seaweedFs.delete(seaweedPath, false); + + List<WriteOperation> localOps = new ArrayList<>(); + List<WriteOperation> seaweedOps = new ArrayList<>(); + + System.out.println("\n1. Performing synchronized write operations...\n"); + + // Open both streams + FSDataOutputStream localStream = localFs.create(localPath, true); + FSDataOutputStream seaweedStream = seaweedFs.create(seaweedPath, true); + + LoggingOutputStream localLogging = new LoggingOutputStream(localStream, "LOCAL", localOps); + LoggingOutputStream seaweedLogging = new LoggingOutputStream(seaweedStream, "SEAWEED", seaweedOps); + + int opCount = 0; + boolean mismatchFound = false; + + // Operation 1: Write 4 bytes (magic) + opCount++; + System.out.println(" Op " + opCount + ": write(4 bytes) - Writing magic bytes"); + byte[] magic = "PAR1".getBytes(); + localLogging.write(magic, 0, 4); + seaweedLogging.write(magic, 0, 4); + long localPos1 = localLogging.getPos(); + long seaweedPos1 = seaweedLogging.getPos(); + System.out.println(" LOCAL: getPos() = " + localPos1); + System.out.println(" SEAWEED: getPos() = " + seaweedPos1); + if (localPos1 != seaweedPos1) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match"); + } + + // Operation 2: Write 100 bytes of data + opCount++; + System.out.println("\n Op " + opCount + ": write(100 bytes) - Writing data"); + byte[] data = new byte[100]; + for (int i = 0; i < 100; i++) { + data[i] = (byte) i; + } + localLogging.write(data, 0, 100); + seaweedLogging.write(data, 0, 100); + long localPos2 = localLogging.getPos(); + long seaweedPos2 = seaweedLogging.getPos(); + System.out.println(" LOCAL: getPos() = " + localPos2); + System.out.println(" SEAWEED: getPos() = " + seaweedPos2); + if (localPos2 != seaweedPos2) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match"); + } + + // Operation 3: Flush + opCount++; + System.out.println("\n Op " + opCount + ": flush()"); + localLogging.flush(); + seaweedLogging.flush(); + long localPos3 = localLogging.getPos(); + long seaweedPos3 = seaweedLogging.getPos(); + System.out.println(" LOCAL: getPos() after flush = " + localPos3); + System.out.println(" SEAWEED: getPos() after flush = " + seaweedPos3); + if (localPos3 != seaweedPos3) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match"); + } + + // Operation 4: Write more data + opCount++; + System.out.println("\n Op " + opCount + ": write(50 bytes) - Writing more data"); + byte[] moreData = new byte[50]; + for (int i = 0; i < 50; i++) { + moreData[i] = (byte) (i + 100); + } + localLogging.write(moreData, 0, 50); + seaweedLogging.write(moreData, 0, 50); + long localPos4 = localLogging.getPos(); + long seaweedPos4 = seaweedLogging.getPos(); + System.out.println(" LOCAL: getPos() = " + localPos4); + System.out.println(" SEAWEED: getPos() = " + seaweedPos4); + if (localPos4 != seaweedPos4) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match"); + } + + // Operation 5: Write final bytes (simulating footer) + opCount++; + System.out.println("\n Op " + opCount + ": write(8 bytes) - Writing footer"); + byte[] footer = new byte[]{0x6B, 0x03, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31}; + localLogging.write(footer, 0, 8); + seaweedLogging.write(footer, 0, 8); + long localPos5 = localLogging.getPos(); + long seaweedPos5 = seaweedLogging.getPos(); + System.out.println(" LOCAL: getPos() = " + localPos5); + System.out.println(" SEAWEED: getPos() = " + seaweedPos5); + if (localPos5 != seaweedPos5) { + System.out.println(" ❌ MISMATCH!"); + mismatchFound = true; + } else { + System.out.println(" ✅ Match"); + } + + // Operation 6: Close + opCount++; + System.out.println("\n Op " + opCount + ": close()"); + System.out.println(" LOCAL: closing at position " + localPos5); + System.out.println(" SEAWEED: closing at position " + seaweedPos5); + localLogging.close(); + seaweedLogging.close(); + + // Check final file sizes + System.out.println("\n2. Comparing final file sizes..."); + long localSize = localFs.getFileStatus(localPath).getLen(); + long seaweedSize = seaweedFs.getFileStatus(seaweedPath).getLen(); + System.out.println(" LOCAL: " + localSize + " bytes"); + System.out.println(" SEAWEED: " + seaweedSize + " bytes"); + + if (localSize != seaweedSize) { + System.out.println(" ❌ File sizes DIFFER by " + Math.abs(localSize - seaweedSize) + " bytes"); + mismatchFound = true; + } else { + System.out.println(" ✅ File sizes MATCH"); + } + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ COMPARISON SUMMARY ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + System.out.println(" Total operations: " + opCount); + System.out.println(" LOCAL operations: " + localOps.size()); + System.out.println(" SEAWEED operations: " + seaweedOps.size()); + + if (mismatchFound) { + System.out.println("\n ❌ MISMATCHES FOUND - Streams behave differently!"); + } else { + System.out.println("\n ✅ ALL OPERATIONS MATCH - Streams are identical!"); + } + + System.out.println("\n Detailed operation log:"); + System.out.println(" ----------------------"); + int maxOps = Math.max(localOps.size(), seaweedOps.size()); + for (int i = 0; i < maxOps; i++) { + if (i < localOps.size()) { + System.out.println(" " + localOps.get(i)); + } + if (i < seaweedOps.size()) { + System.out.println(" " + seaweedOps.get(i)); + } + if (i < localOps.size() && i < seaweedOps.size()) { + WriteOperation localOp = localOps.get(i); + WriteOperation seaweedOp = seaweedOps.get(i); + if (localOp.positionAfter != seaweedOp.positionAfter) { + System.out.println(" ⚠️ Position mismatch: LOCAL=" + localOp.positionAfter + + " SEAWEED=" + seaweedOp.positionAfter); + } + } + } + + assertFalse("Streams should behave identically", mismatchFound); + } +} + diff --git a/test/java/spark/src/test/java/seaweed/spark/ParquetOperationComparisonTest.java b/test/java/spark/src/test/java/seaweed/spark/ParquetOperationComparisonTest.java new file mode 100644 index 000000000..5636618ec --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/ParquetOperationComparisonTest.java @@ -0,0 +1,387 @@ +package seaweed.spark; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.example.data.Group; +import org.apache.parquet.example.data.simple.SimpleGroupFactory; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.example.GroupWriteSupport; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.MessageTypeParser; +import org.junit.Test; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +/** + * Detailed comparison of InputStream/OutputStream operations between + * local filesystem and SeaweedFS during Parquet file writing. + * + * This test intercepts and logs every read/write/getPos operation to + * identify exactly where the behavior diverges. + */ +public class ParquetOperationComparisonTest extends SparkTestBase { + + private static final String SCHEMA_STRING = "message Employee { " + + " required int32 id; " + + " required binary name (UTF8); " + + " required int32 age; " + + "}"; + + private static final MessageType SCHEMA = MessageTypeParser.parseMessageType(SCHEMA_STRING); + + // Track all operations for comparison + private static class OperationLog { + List<String> operations = new ArrayList<>(); + + void log(String op) { + operations.add(op); + System.out.println(" " + op); + } + + void print(String title) { + System.out.println("\n" + title + " (" + operations.size() + " operations):"); + for (int i = 0; i < operations.size(); i++) { + System.out.printf(" [%3d] %s\n", i, operations.get(i)); + } + } + + void compare(OperationLog other, String name1, String name2) { + System.out.println("\n=== COMPARISON: " + name1 + " vs " + name2 + " ==="); + + int maxLen = Math.max(operations.size(), other.operations.size()); + int differences = 0; + + for (int i = 0; i < maxLen; i++) { + String op1 = i < operations.size() ? operations.get(i) : "<missing>"; + String op2 = i < other.operations.size() ? other.operations.get(i) : "<missing>"; + + if (!op1.equals(op2)) { + differences++; + System.out.printf("[%3d] DIFF:\n", i); + System.out.println(" " + name1 + ": " + op1); + System.out.println(" " + name2 + ": " + op2); + } + } + + if (differences == 0) { + System.out.println("✅ Operations are IDENTICAL!"); + } else { + System.out.println("❌ Found " + differences + " differences"); + } + } + } + + // Wrapper for FSDataOutputStream that logs all operations + private static class LoggingOutputStream extends FSDataOutputStream { + private final FSDataOutputStream delegate; + private final OperationLog log; + private final String name; + + public LoggingOutputStream(FSDataOutputStream delegate, OperationLog log, String name) throws IOException { + super(delegate.getWrappedStream(), null); + this.delegate = delegate; + this.log = log; + this.name = name; + log.log(name + " CREATED"); + } + + @Override + public void write(int b) throws IOException { + log.log(String.format("write(byte) pos=%d", getPos())); + delegate.write(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + long posBefore = getPos(); + delegate.write(b, off, len); + long posAfter = getPos(); + log.log(String.format("write(%d bytes) pos %d→%d", len, posBefore, posAfter)); + } + + @Override + public long getPos() { + long pos = delegate.getPos(); + // Don't log getPos itself to avoid infinite recursion, but track it + return pos; + } + + @Override + public void flush() throws IOException { + log.log(String.format("flush() pos=%d", getPos())); + delegate.flush(); + } + + @Override + public void close() throws IOException { + log.log(String.format("close() pos=%d", getPos())); + delegate.close(); + } + + @Override + public void hflush() throws IOException { + log.log(String.format("hflush() pos=%d", getPos())); + delegate.hflush(); + } + + @Override + public void hsync() throws IOException { + log.log(String.format("hsync() pos=%d", getPos())); + delegate.hsync(); + } + } + + // Wrapper for FSDataInputStream that logs all operations + private static class LoggingInputStream extends FSDataInputStream { + private final OperationLog log; + private final String name; + + public LoggingInputStream(FSDataInputStream delegate, OperationLog log, String name) throws IOException { + super(delegate); + this.log = log; + this.name = name; + log.log(name + " CREATED"); + } + + @Override + public int read() throws IOException { + long posBefore = getPos(); + int result = super.read(); + log.log(String.format("read() pos %d→%d result=%d", posBefore, getPos(), result)); + return result; + } + + // Can't override read(byte[], int, int) as it's final in DataInputStream + // The logging will happen through read(ByteBuffer) which is what Parquet uses + + @Override + public int read(ByteBuffer buf) throws IOException { + long posBefore = getPos(); + int result = super.read(buf); + log.log(String.format("read(ByteBuffer %d) pos %d→%d result=%d", buf.remaining(), posBefore, getPos(), + result)); + return result; + } + + @Override + public void seek(long pos) throws IOException { + long posBefore = getPos(); + super.seek(pos); + log.log(String.format("seek(%d) pos %d→%d", pos, posBefore, getPos())); + } + + @Override + public void close() throws IOException { + log.log(String.format("close() pos=%d", getPos())); + super.close(); + } + } + + @Test + public void testCompareWriteOperations() throws Exception { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ PARQUET WRITE OPERATION COMPARISON TEST ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝\n"); + + // Setup filesystems + Configuration localConf = new Configuration(); + FileSystem localFs = FileSystem.getLocal(localConf); + + Configuration seaweedConf = new Configuration(); + seaweedConf.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); + seaweedConf.set("fs.seaweed.filer.host", SEAWEEDFS_HOST); + seaweedConf.set("fs.seaweed.filer.port", SEAWEEDFS_PORT); + FileSystem seaweedFs = FileSystem.get( + java.net.URI.create("seaweedfs://" + SEAWEEDFS_HOST + ":" + SEAWEEDFS_PORT), + seaweedConf); + + Path localPath = new Path("/tmp/test-local-ops-" + System.currentTimeMillis() + ".parquet"); + Path seaweedPath = new Path("seaweedfs://" + SEAWEEDFS_HOST + ":" + SEAWEEDFS_PORT + + "/test-spark/ops-test.parquet"); + + OperationLog localLog = new OperationLog(); + OperationLog seaweedLog = new OperationLog(); + + // Write to local filesystem with logging + System.out.println("=== Writing to LOCAL filesystem ==="); + writeParquetWithLogging(localFs, localPath, localConf, localLog, "LOCAL"); + + System.out.println("\n=== Writing to SEAWEEDFS ==="); + writeParquetWithLogging(seaweedFs, seaweedPath, seaweedConf, seaweedLog, "SEAWEED"); + + // Print logs + localLog.print("LOCAL OPERATIONS"); + seaweedLog.print("SEAWEEDFS OPERATIONS"); + + // Compare + localLog.compare(seaweedLog, "LOCAL", "SEAWEEDFS"); + + // Cleanup + localFs.delete(localPath, false); + seaweedFs.delete(seaweedPath, false); + + localFs.close(); + seaweedFs.close(); + + System.out.println("\n=== Test Complete ==="); + } + + @Test + public void testCompareReadOperations() throws Exception { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ PARQUET READ OPERATION COMPARISON TEST ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝\n"); + + // Setup filesystems + Configuration localConf = new Configuration(); + FileSystem localFs = FileSystem.getLocal(localConf); + + Configuration seaweedConf = new Configuration(); + seaweedConf.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); + seaweedConf.set("fs.seaweed.filer.host", SEAWEEDFS_HOST); + seaweedConf.set("fs.seaweed.filer.port", SEAWEEDFS_PORT); + FileSystem seaweedFs = FileSystem.get( + java.net.URI.create("seaweedfs://" + SEAWEEDFS_HOST + ":" + SEAWEEDFS_PORT), + seaweedConf); + + Path localPath = new Path("/tmp/test-local-read-" + System.currentTimeMillis() + ".parquet"); + Path seaweedPath = new Path("seaweedfs://" + SEAWEEDFS_HOST + ":" + SEAWEEDFS_PORT + + "/test-spark/read-test.parquet"); + + // First write files without logging + System.out.println("=== Writing test files ==="); + writeParquetSimple(localFs, localPath, localConf); + writeParquetSimple(seaweedFs, seaweedPath, seaweedConf); + System.out.println("✅ Files written"); + + OperationLog localLog = new OperationLog(); + OperationLog seaweedLog = new OperationLog(); + + // Read from local filesystem with logging + System.out.println("\n=== Reading from LOCAL filesystem ==="); + readParquetWithLogging(localFs, localPath, localLog, "LOCAL"); + + System.out.println("\n=== Reading from SEAWEEDFS ==="); + readParquetWithLogging(seaweedFs, seaweedPath, seaweedLog, "SEAWEED"); + + // Print logs + localLog.print("LOCAL READ OPERATIONS"); + seaweedLog.print("SEAWEEDFS READ OPERATIONS"); + + // Compare + localLog.compare(seaweedLog, "LOCAL", "SEAWEEDFS"); + + // Cleanup + localFs.delete(localPath, false); + seaweedFs.delete(seaweedPath, false); + + localFs.close(); + seaweedFs.close(); + + System.out.println("\n=== Test Complete ==="); + } + + private void writeParquetWithLogging(FileSystem fs, Path path, Configuration conf, + OperationLog log, String name) throws IOException { + // We can't easily intercept ParquetWriter's internal stream usage, + // but we can log the file operations + log.log(name + " START WRITE"); + + GroupWriteSupport.setSchema(SCHEMA, conf); + + try (ParquetWriter<Group> writer = org.apache.parquet.hadoop.example.ExampleParquetWriter.builder(path) + .withConf(conf) + .withWriteMode(org.apache.parquet.hadoop.ParquetFileWriter.Mode.OVERWRITE) + .build()) { + + SimpleGroupFactory factory = new SimpleGroupFactory(SCHEMA); + + log.log("WRITE ROW 1"); + Group group1 = factory.newGroup() + .append("id", 1) + .append("name", "Alice") + .append("age", 30); + writer.write(group1); + + log.log("WRITE ROW 2"); + Group group2 = factory.newGroup() + .append("id", 2) + .append("name", "Bob") + .append("age", 25); + writer.write(group2); + + log.log("WRITE ROW 3"); + Group group3 = factory.newGroup() + .append("id", 3) + .append("name", "Charlie") + .append("age", 35); + writer.write(group3); + + log.log("CLOSE WRITER"); + } + + // Check final file size + org.apache.hadoop.fs.FileStatus status = fs.getFileStatus(path); + log.log(String.format("FINAL FILE SIZE: %d bytes", status.getLen())); + } + + private void writeParquetSimple(FileSystem fs, Path path, Configuration conf) throws IOException { + GroupWriteSupport.setSchema(SCHEMA, conf); + + try (ParquetWriter<Group> writer = org.apache.parquet.hadoop.example.ExampleParquetWriter.builder(path) + .withConf(conf) + .withWriteMode(org.apache.parquet.hadoop.ParquetFileWriter.Mode.OVERWRITE) + .build()) { + + SimpleGroupFactory factory = new SimpleGroupFactory(SCHEMA); + + writer.write(factory.newGroup().append("id", 1).append("name", "Alice").append("age", 30)); + writer.write(factory.newGroup().append("id", 2).append("name", "Bob").append("age", 25)); + writer.write(factory.newGroup().append("id", 3).append("name", "Charlie").append("age", 35)); + } + } + + private void readParquetWithLogging(FileSystem fs, Path path, OperationLog log, String name) throws IOException { + log.log(name + " START READ"); + + // Read file in chunks to see the pattern + try (FSDataInputStream in = fs.open(path)) { + byte[] buffer = new byte[256]; + int totalRead = 0; + int chunkNum = 0; + + while (true) { + long posBefore = in.getPos(); + int bytesRead = in.read(buffer); + + if (bytesRead == -1) { + log.log(String.format("READ CHUNK %d: EOF at pos=%d", chunkNum, posBefore)); + break; + } + + totalRead += bytesRead; + log.log(String.format("READ CHUNK %d: %d bytes at pos %d→%d", + chunkNum, bytesRead, posBefore, in.getPos())); + chunkNum++; + } + + log.log(String.format("TOTAL READ: %d bytes in %d chunks", totalRead, chunkNum)); + } + } +} diff --git a/test/java/spark/src/test/java/seaweed/spark/RenameChunkVerificationTest.java b/test/java/spark/src/test/java/seaweed/spark/RenameChunkVerificationTest.java new file mode 100644 index 000000000..0002c26b1 --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/RenameChunkVerificationTest.java @@ -0,0 +1,286 @@ +package seaweed.spark; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.*; + +/** + * Test to verify if file chunks are preserved during rename operations. + * This could explain why Parquet files become unreadable after Spark's commit. + */ +public class RenameChunkVerificationTest extends SparkTestBase { + + @Before + public void setUp() throws IOException { + if (!TESTS_ENABLED) { + return; + } + super.setUpSpark(); + } + + @After + public void tearDown() throws IOException { + if (!TESTS_ENABLED) { + return; + } + super.tearDownSpark(); + } + + @Test + public void testSparkWriteAndRenamePreservesChunks() throws Exception { + skipIfTestsDisabled(); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ TESTING: Chunk Preservation During Spark Write & Rename ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + + // Write using Spark (which uses rename for commit) + List<SparkSQLTest.Employee> employees = Arrays.asList( + new SparkSQLTest.Employee(1, "Alice", "Engineering", 100000), + new SparkSQLTest.Employee(2, "Bob", "Sales", 80000), + new SparkSQLTest.Employee(3, "Charlie", "Engineering", 120000), + new SparkSQLTest.Employee(4, "David", "Sales", 75000)); + + org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> df = + spark.createDataFrame(employees, SparkSQLTest.Employee.class); + + String tablePath = getTestPath("chunk-test"); + + System.out.println("\n1. Writing Parquet file using Spark..."); + df.write().mode(org.apache.spark.sql.SaveMode.Overwrite).parquet(tablePath); + System.out.println(" ✅ Write complete"); + + // Get file system + Configuration conf = new Configuration(); + conf.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); + conf.set("fs.seaweed.filer.host", SEAWEEDFS_HOST); + conf.set("fs.seaweed.filer.port", String.valueOf(SEAWEEDFS_PORT)); + FileSystem fs = FileSystem.get(URI.create(String.format("seaweedfs://%s:%s", + SEAWEEDFS_HOST, SEAWEEDFS_PORT)), conf); + + // Find the parquet file + Path parquetFile = null; + org.apache.hadoop.fs.FileStatus[] files = fs.listStatus(new Path(tablePath)); + for (org.apache.hadoop.fs.FileStatus file : files) { + if (file.getPath().getName().endsWith(".parquet") && + !file.getPath().getName().startsWith("_")) { + parquetFile = file.getPath(); + break; + } + } + + assertNotNull("Parquet file not found", parquetFile); + + System.out.println("\n2. Checking file metadata after Spark write..."); + org.apache.hadoop.fs.FileStatus fileStatus = fs.getFileStatus(parquetFile); + long fileSize = fileStatus.getLen(); + System.out.println(" File: " + parquetFile.getName()); + System.out.println(" Size: " + fileSize + " bytes"); + + // Try to read the file + System.out.println("\n3. Attempting to read file with Spark..."); + try { + org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> readDf = + spark.read().parquet(tablePath); + long count = readDf.count(); + System.out.println(" ✅ Read SUCCESS - " + count + " rows"); + readDf.show(); + } catch (Exception e) { + System.out.println(" ❌ Read FAILED: " + e.getMessage()); + System.out.println("\n Error details:"); + e.printStackTrace(); + + // This is expected to fail - let's investigate why + System.out.println("\n4. Investigating chunk availability..."); + + // Try to read the raw bytes + System.out.println("\n Attempting to read raw bytes..."); + try (org.apache.hadoop.fs.FSDataInputStream in = fs.open(parquetFile)) { + byte[] header = new byte[4]; + int read = in.read(header); + System.out.println(" Read " + read + " bytes"); + System.out.println(" Header: " + bytesToHex(header)); + + if (read == 4 && Arrays.equals(header, "PAR1".getBytes())) { + System.out.println(" ✅ Magic bytes are correct (PAR1)"); + } else { + System.out.println(" ❌ Magic bytes are WRONG!"); + } + + // Try to read footer + in.seek(fileSize - 8); + byte[] footer = new byte[8]; + read = in.read(footer); + System.out.println("\n Footer (last 8 bytes): " + bytesToHex(footer)); + + // Try to read entire file + in.seek(0); + byte[] allBytes = new byte[(int)fileSize]; + int totalRead = 0; + while (totalRead < fileSize) { + int bytesRead = in.read(allBytes, totalRead, (int)(fileSize - totalRead)); + if (bytesRead == -1) { + System.out.println(" ❌ Premature EOF at byte " + totalRead + " (expected " + fileSize + ")"); + break; + } + totalRead += bytesRead; + } + + if (totalRead == fileSize) { + System.out.println(" ✅ Successfully read all " + totalRead + " bytes"); + } else { + System.out.println(" ❌ Only read " + totalRead + " of " + fileSize + " bytes"); + } + + } catch (Exception readEx) { + System.out.println(" ❌ Raw read failed: " + readEx.getMessage()); + readEx.printStackTrace(); + } + } + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ TEST COMPLETE ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + } + + @Test + public void testManualRenamePreservesChunks() throws Exception { + skipIfTestsDisabled(); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ TESTING: Manual Rename Chunk Preservation ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + + // Get file system + Configuration conf = new Configuration(); + conf.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); + conf.set("fs.seaweed.filer.host", SEAWEEDFS_HOST); + conf.set("fs.seaweed.filer.port", String.valueOf(SEAWEEDFS_PORT)); + FileSystem fs = FileSystem.get(URI.create(String.format("seaweedfs://%s:%s", + SEAWEEDFS_HOST, SEAWEEDFS_PORT)), conf); + + Path sourcePath = new Path(getTestPath("rename-source.dat")); + Path destPath = new Path(getTestPath("rename-dest.dat")); + + // Clean up + fs.delete(sourcePath, false); + fs.delete(destPath, false); + + System.out.println("\n1. Creating test file..."); + byte[] testData = new byte[1260]; + for (int i = 0; i < testData.length; i++) { + testData[i] = (byte)(i % 256); + } + + try (org.apache.hadoop.fs.FSDataOutputStream out = fs.create(sourcePath, true)) { + out.write(testData); + } + System.out.println(" ✅ Created source file: " + sourcePath); + + // Check source file + System.out.println("\n2. Verifying source file..."); + org.apache.hadoop.fs.FileStatus sourceStatus = fs.getFileStatus(sourcePath); + System.out.println(" Size: " + sourceStatus.getLen() + " bytes"); + + // Read source file + try (org.apache.hadoop.fs.FSDataInputStream in = fs.open(sourcePath)) { + byte[] readData = new byte[1260]; + int totalRead = 0; + while (totalRead < 1260) { + int bytesRead = in.read(readData, totalRead, 1260 - totalRead); + if (bytesRead == -1) break; + totalRead += bytesRead; + } + System.out.println(" Read: " + totalRead + " bytes"); + + if (Arrays.equals(testData, readData)) { + System.out.println(" ✅ Source file data is correct"); + } else { + System.out.println(" ❌ Source file data is CORRUPTED"); + } + } + + // Perform rename + System.out.println("\n3. Renaming file..."); + boolean renamed = fs.rename(sourcePath, destPath); + System.out.println(" Rename result: " + renamed); + + if (!renamed) { + System.out.println(" ❌ Rename FAILED"); + return; + } + + // Check destination file + System.out.println("\n4. Verifying destination file..."); + org.apache.hadoop.fs.FileStatus destStatus = fs.getFileStatus(destPath); + System.out.println(" Size: " + destStatus.getLen() + " bytes"); + + if (destStatus.getLen() != sourceStatus.getLen()) { + System.out.println(" ❌ File size CHANGED during rename!"); + System.out.println(" Source: " + sourceStatus.getLen()); + System.out.println(" Dest: " + destStatus.getLen()); + } else { + System.out.println(" ✅ File size preserved"); + } + + // Read destination file + try (org.apache.hadoop.fs.FSDataInputStream in = fs.open(destPath)) { + byte[] readData = new byte[1260]; + int totalRead = 0; + while (totalRead < 1260) { + int bytesRead = in.read(readData, totalRead, 1260 - totalRead); + if (bytesRead == -1) { + System.out.println(" ❌ Premature EOF at byte " + totalRead); + break; + } + totalRead += bytesRead; + } + System.out.println(" Read: " + totalRead + " bytes"); + + if (totalRead == 1260 && Arrays.equals(testData, readData)) { + System.out.println(" ✅ Destination file data is CORRECT"); + } else { + System.out.println(" ❌ Destination file data is CORRUPTED or INCOMPLETE"); + + // Show first difference + for (int i = 0; i < Math.min(totalRead, 1260); i++) { + if (testData[i] != readData[i]) { + System.out.println(" First difference at byte " + i); + System.out.println(" Expected: " + String.format("0x%02X", testData[i])); + System.out.println(" Got: " + String.format("0x%02X", readData[i])); + break; + } + } + } + } catch (Exception e) { + System.out.println(" ❌ Read FAILED: " + e.getMessage()); + e.printStackTrace(); + } + + // Clean up + fs.delete(destPath, false); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ TEST COMPLETE ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + } + + private String bytesToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02X ", b)); + } + return sb.toString().trim(); + } +} + diff --git a/test/java/spark/src/test/java/seaweed/spark/SimpleOneColumnTest.java b/test/java/spark/src/test/java/seaweed/spark/SimpleOneColumnTest.java new file mode 100644 index 000000000..092039042 --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/SimpleOneColumnTest.java @@ -0,0 +1,140 @@ +package seaweed.spark; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.*; + +/** + * Simplified test with only one column to isolate the EOF issue. + */ +public class SimpleOneColumnTest extends SparkTestBase { + + @Test + public void testSingleIntegerColumn() { + skipIfTestsDisabled(); + + // Clean up any previous test data + String tablePath = getTestPath("simple_data"); + try { + spark.read().parquet(tablePath); + // If we get here, path exists, so delete it + org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get( + new java.net.URI(tablePath), + spark.sparkContext().hadoopConfiguration()); + fs.delete(new org.apache.hadoop.fs.Path(tablePath), true); + } catch (Exception e) { + // Path doesn't exist, which is fine + } + + // Create simple data with just one integer column + List<SimpleData> data = Arrays.asList( + new SimpleData(1), + new SimpleData(2), + new SimpleData(3), + new SimpleData(4)); + + Dataset<Row> df = spark.createDataFrame(data, SimpleData.class); + + // Write to SeaweedFS + df.write().mode(SaveMode.Overwrite).parquet(tablePath); + + // Read back + Dataset<Row> readDf = spark.read().parquet(tablePath); + + // Simple count + assertEquals(4, readDf.count()); + + // Create view and query + readDf.createOrReplaceTempView("simple"); + + // Simple WHERE query + Dataset<Row> filtered = spark.sql("SELECT value FROM simple WHERE value > 2"); + assertEquals(2, filtered.count()); + + // Verify values + List<Row> results = filtered.collectAsList(); + assertTrue(results.stream().anyMatch(r -> r.getInt(0) == 3)); + assertTrue(results.stream().anyMatch(r -> r.getInt(0) == 4)); + } + + @Test + public void testSingleStringColumn() { + skipIfTestsDisabled(); + + // Create simple data with just one string column + List<StringData> data = Arrays.asList( + new StringData("Alice"), + new StringData("Bob"), + new StringData("Charlie"), + new StringData("David")); + + Dataset<Row> df = spark.createDataFrame(data, StringData.class); + + // Write to SeaweedFS + String tablePath = getTestPath("string_data"); + df.write().mode(SaveMode.Overwrite).parquet(tablePath); + + // Read back + Dataset<Row> readDf = spark.read().parquet(tablePath); + + // Simple count + assertEquals(4, readDf.count()); + + // Create view and query + readDf.createOrReplaceTempView("strings"); + + // Simple WHERE query + Dataset<Row> filtered = spark.sql("SELECT name FROM strings WHERE name LIKE 'A%'"); + assertEquals(1, filtered.count()); + + // Verify value + List<Row> results = filtered.collectAsList(); + assertEquals("Alice", results.get(0).getString(0)); + } + + // Test data classes + public static class SimpleData implements java.io.Serializable { + private int value; + + public SimpleData() { + } + + public SimpleData(int value) { + this.value = value; + } + + public int getValue() { + return value; + } + + public void setValue(int value) { + this.value = value; + } + } + + public static class StringData implements java.io.Serializable { + private String name; + + public StringData() { + } + + public StringData(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + } +} + diff --git a/test/java/spark/src/test/java/seaweed/spark/SparkDataFrameWriteComparisonTest.java b/test/java/spark/src/test/java/seaweed/spark/SparkDataFrameWriteComparisonTest.java new file mode 100644 index 000000000..d3fc1555c --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/SparkDataFrameWriteComparisonTest.java @@ -0,0 +1,363 @@ +package seaweed.spark; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Progressable; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Compare Spark DataFrame.write().parquet() operations between + * local filesystem and SeaweedFS to identify the exact difference + * that causes the 78-byte EOF error. + */ +public class SparkDataFrameWriteComparisonTest extends SparkTestBase { + + private static class OperationLog { + List<String> operations = new ArrayList<>(); + + synchronized void log(String op) { + operations.add(op); + System.out.println(" " + op); + } + + void print(String title) { + System.out.println("\n" + title + " (" + operations.size() + " operations):"); + for (int i = 0; i < operations.size(); i++) { + System.out.printf(" [%3d] %s\n", i, operations.get(i)); + } + } + + void compare(OperationLog other, String name1, String name2) { + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ COMPARISON: " + name1 + " vs " + name2); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + + int maxLen = Math.max(operations.size(), other.operations.size()); + int differences = 0; + + for (int i = 0; i < maxLen; i++) { + String op1 = i < operations.size() ? operations.get(i) : "<missing>"; + String op2 = i < other.operations.size() ? other.operations.get(i) : "<missing>"; + + // Normalize operation strings for comparison (remove file-specific parts) + String normalized1 = normalizeOp(op1); + String normalized2 = normalizeOp(op2); + + if (!normalized1.equals(normalized2)) { + differences++; + System.out.printf("\n[%3d] DIFFERENCE:\n", i); + System.out.println(" " + name1 + ": " + op1); + System.out.println(" " + name2 + ": " + op2); + } + } + + System.out.println("\n" + "=".repeat(64)); + if (differences == 0) { + System.out.println("✅ Operations are IDENTICAL!"); + } else { + System.out.println("❌ Found " + differences + " differences"); + } + System.out.println("=".repeat(64)); + } + + private String normalizeOp(String op) { + // Remove file-specific identifiers for comparison + return op.replaceAll("part-[0-9a-f-]+", "part-XXXXX") + .replaceAll("attempt_[0-9]+", "attempt_XXXXX") + .replaceAll("/tmp/[^/]+", "/tmp/XXXXX") + .replaceAll("test-local-[0-9]+", "test-local-XXXXX"); + } + } + + // Custom FileSystem wrapper that logs all operations + private static class LoggingFileSystem extends FilterFileSystem { + private final OperationLog log; + private final String name; + + public LoggingFileSystem(FileSystem fs, OperationLog log, String name) { + this.fs = fs; + this.log = log; + this.name = name; + } + + @Override + public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, + int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + log.log(String.format("%s CREATE: %s (bufferSize=%d)", name, f.getName(), bufferSize)); + FSDataOutputStream out = fs.create(f, permission, overwrite, bufferSize, replication, blockSize, progress); + return new LoggingOutputStream(out, log, name, f.getName()); + } + + @Override + public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException { + log.log(String.format("%s APPEND: %s (bufferSize=%d)", name, f.getName(), bufferSize)); + FSDataOutputStream out = fs.append(f, bufferSize, progress); + return new LoggingOutputStream(out, log, name, f.getName()); + } + + @Override + public boolean rename(Path src, Path dst) throws IOException { + log.log(String.format("%s RENAME: %s → %s", name, src.getName(), dst.getName())); + return fs.rename(src, dst); + } + + @Override + public boolean delete(Path f, boolean recursive) throws IOException { + log.log(String.format("%s DELETE: %s (recursive=%s)", name, f.getName(), recursive)); + return fs.delete(f, recursive); + } + + @Override + public FileStatus[] listStatus(Path f) throws IOException { + FileStatus[] result = fs.listStatus(f); + log.log(String.format("%s LISTSTATUS: %s (%d files)", name, f.getName(), result.length)); + return result; + } + + @Override + public void setWorkingDirectory(Path new_dir) { + fs.setWorkingDirectory(new_dir); + } + + @Override + public Path getWorkingDirectory() { + return fs.getWorkingDirectory(); + } + + @Override + public boolean mkdirs(Path f, FsPermission permission) throws IOException { + log.log(String.format("%s MKDIRS: %s", name, f.getName())); + return fs.mkdirs(f, permission); + } + + @Override + public FileStatus getFileStatus(Path f) throws IOException { + FileStatus status = fs.getFileStatus(f); + log.log(String.format("%s GETFILESTATUS: %s (size=%d)", name, f.getName(), status.getLen())); + return status; + } + + @Override + public FSDataInputStream open(Path f, int bufferSize) throws IOException { + log.log(String.format("%s OPEN: %s (bufferSize=%d)", name, f.getName(), bufferSize)); + return fs.open(f, bufferSize); + } + } + + private static class LoggingOutputStream extends FSDataOutputStream { + private final FSDataOutputStream delegate; + private final OperationLog log; + private final String name; + private final String filename; + private long writeCount = 0; + + public LoggingOutputStream(FSDataOutputStream delegate, OperationLog log, String name, String filename) throws IOException { + super(delegate.getWrappedStream(), null); + this.delegate = delegate; + this.log = log; + this.name = name; + this.filename = filename; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + writeCount++; + long posBefore = getPos(); + delegate.write(b, off, len); + long posAfter = getPos(); + + // Log significant writes and the last few writes (potential footer) + if (len >= 100 || writeCount <= 5 || (writeCount % 100 == 0)) { + log.log(String.format("%s WRITE #%d: %d bytes, pos %d→%d [%s]", + name, writeCount, len, posBefore, posAfter, filename)); + } + } + + @Override + public long getPos() { + long pos = delegate.getPos(); + return pos; + } + + @Override + public void flush() throws IOException { + log.log(String.format("%s FLUSH: pos=%d [%s]", name, getPos(), filename)); + delegate.flush(); + } + + @Override + public void close() throws IOException { + log.log(String.format("%s CLOSE: pos=%d, totalWrites=%d [%s]", + name, getPos(), writeCount, filename)); + delegate.close(); + } + + @Override + public void hflush() throws IOException { + log.log(String.format("%s HFLUSH: pos=%d [%s]", name, getPos(), filename)); + delegate.hflush(); + } + + @Override + public void hsync() throws IOException { + log.log(String.format("%s HSYNC: pos=%d [%s]", name, getPos(), filename)); + delegate.hsync(); + } + } + + @Test + public void testCompareSparkDataFrameWrite() throws Exception { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ SPARK DATAFRAME.WRITE() OPERATION COMPARISON TEST ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝\n"); + + // Create test data (4 rows - this is what causes the error) + List<Employee> employees = Arrays.asList( + new Employee(1, "Alice", "Engineering", 100000), + new Employee(2, "Bob", "Sales", 80000), + new Employee(3, "Charlie", "Engineering", 120000), + new Employee(4, "David", "Sales", 75000) + ); + + Dataset<Row> df = spark.createDataFrame(employees, Employee.class); + + OperationLog localLog = new OperationLog(); + OperationLog seaweedLog = new OperationLog(); + + // Test 1: Write to local filesystem with logging + System.out.println("=== Writing to LOCAL filesystem with Spark ==="); + String localPath = "/tmp/spark-local-test-" + System.currentTimeMillis(); + + try { + // Configure Spark to use our logging filesystem for local writes + Configuration localConf = new Configuration(); + FileSystem localFs = FileSystem.getLocal(localConf); + LoggingFileSystem loggingLocalFs = new LoggingFileSystem(localFs, localLog, "LOCAL"); + + // Write using Spark + df.write().mode(SaveMode.Overwrite).parquet("file://" + localPath); + + System.out.println("✅ Local write completed"); + + // Check final file + FileStatus[] files = localFs.listStatus(new Path(localPath)); + for (FileStatus file : files) { + if (file.getPath().getName().endsWith(".parquet")) { + localLog.log(String.format("LOCAL FINAL FILE: %s (%d bytes)", + file.getPath().getName(), file.getLen())); + } + } + + } catch (Exception e) { + System.out.println("❌ Local write failed: " + e.getMessage()); + e.printStackTrace(); + } + + // Test 2: Write to SeaweedFS with logging + System.out.println("\n=== Writing to SEAWEEDFS with Spark ==="); + String seaweedPath = getTestPath("spark-seaweed-test"); + + try { + df.write().mode(SaveMode.Overwrite).parquet(seaweedPath); + + System.out.println("✅ SeaweedFS write completed"); + + // Check final file + Configuration seaweedConf = new Configuration(); + seaweedConf.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); + seaweedConf.set("fs.seaweed.filer.host", SEAWEEDFS_HOST); + seaweedConf.set("fs.seaweed.filer.port", SEAWEEDFS_PORT); + FileSystem seaweedFs = FileSystem.get( + java.net.URI.create("seaweedfs://" + SEAWEEDFS_HOST + ":" + SEAWEEDFS_PORT), + seaweedConf); + + FileStatus[] files = seaweedFs.listStatus(new Path(seaweedPath)); + for (FileStatus file : files) { + if (file.getPath().getName().endsWith(".parquet")) { + seaweedLog.log(String.format("SEAWEED FINAL FILE: %s (%d bytes)", + file.getPath().getName(), file.getLen())); + } + } + + } catch (Exception e) { + System.out.println("❌ SeaweedFS write failed: " + e.getMessage()); + if (e.getMessage() != null && e.getMessage().contains("bytes left")) { + System.out.println("🎯 This is the 78-byte EOF error during WRITE!"); + } + e.printStackTrace(); + } + + // Test 3: Try reading both + System.out.println("\n=== Reading LOCAL file ==="); + try { + Dataset<Row> localDf = spark.read().parquet("file://" + localPath); + long count = localDf.count(); + System.out.println("✅ Local read successful: " + count + " rows"); + } catch (Exception e) { + System.out.println("❌ Local read failed: " + e.getMessage()); + } + + System.out.println("\n=== Reading SEAWEEDFS file ==="); + try { + Dataset<Row> seaweedDf = spark.read().parquet(seaweedPath); + long count = seaweedDf.count(); + System.out.println("✅ SeaweedFS read successful: " + count + " rows"); + } catch (Exception e) { + System.out.println("❌ SeaweedFS read failed: " + e.getMessage()); + if (e.getMessage() != null && e.getMessage().contains("bytes left")) { + System.out.println("🎯 This is the 78-byte EOF error during READ!"); + } + } + + // Print operation logs + localLog.print("LOCAL OPERATIONS"); + seaweedLog.print("SEAWEEDFS OPERATIONS"); + + // Compare + localLog.compare(seaweedLog, "LOCAL", "SEAWEEDFS"); + + System.out.println("\n=== Test Complete ==="); + } + + // Employee class for test data + public static class Employee implements java.io.Serializable { + private int id; + private String name; + private String department; + private int salary; + + public Employee() {} + + public Employee(int id, String name, String department, int salary) { + this.id = id; + this.name = name; + this.department = department; + this.salary = salary; + } + + public int getId() { return id; } + public void setId(int id) { this.id = id; } + public String getName() { return name; } + public void setName(String name) { this.name = name; } + public String getDepartment() { return department; } + public void setDepartment(String department) { this.department = department; } + public int getSalary() { return salary; } + public void setSalary(int salary) { this.salary = salary; } + } +} + diff --git a/test/java/spark/src/test/java/seaweed/spark/SparkLocalFileSystemTest.java b/test/java/spark/src/test/java/seaweed/spark/SparkLocalFileSystemTest.java new file mode 100644 index 000000000..1d1881563 --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/SparkLocalFileSystemTest.java @@ -0,0 +1,177 @@ +package seaweed.spark; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.*; + +/** + * Test Spark DataFrame.write() with LOCAL filesystem to see if the issue is SeaweedFS-specific. + * This is the CRITICAL test to determine if the 78-byte error occurs with local files. + */ +public class SparkLocalFileSystemTest extends SparkTestBase { + + private String localTestDir; + + @Before + public void setUp() throws Exception { + super.setUpSpark(); + localTestDir = "/tmp/spark-local-test-" + System.currentTimeMillis(); + new File(localTestDir).mkdirs(); + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ CRITICAL TEST: Spark DataFrame.write() to LOCAL filesystem ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + System.out.println("Local test directory: " + localTestDir); + } + + @After + public void tearDown() throws Exception { + // Clean up + if (localTestDir != null) { + deleteDirectory(new File(localTestDir)); + } + super.tearDownSpark(); + } + + @Test + public void testSparkWriteToLocalFilesystem() { + System.out.println("\n=== TEST: Write Parquet to Local Filesystem ==="); + + // Create test data (same as SparkSQLTest) + List<Employee> employees = Arrays.asList( + new Employee(1, "Alice", "Engineering", 100000), + new Employee(2, "Bob", "Sales", 80000), + new Employee(3, "Charlie", "Engineering", 120000), + new Employee(4, "David", "Sales", 75000)); + + Dataset<Row> df = spark.createDataFrame(employees, Employee.class); + + // Write to LOCAL filesystem using file:// protocol + String localPath = "file://" + localTestDir + "/employees"; + System.out.println("Writing to: " + localPath); + + try { + df.write().mode(SaveMode.Overwrite).parquet(localPath); + System.out.println("✅ Write completed successfully!"); + } catch (Exception e) { + System.err.println("❌ Write FAILED: " + e.getMessage()); + e.printStackTrace(); + fail("Write to local filesystem failed: " + e.getMessage()); + } + + // Now try to READ back + System.out.println("\n=== TEST: Read Parquet from Local Filesystem ==="); + System.out.println("Reading from: " + localPath); + + try { + Dataset<Row> employeesDf = spark.read().parquet(localPath); + employeesDf.createOrReplaceTempView("employees"); + + // Run SQL query + Dataset<Row> engineeringEmployees = spark.sql( + "SELECT name, salary FROM employees WHERE department = 'Engineering'"); + + long count = engineeringEmployees.count(); + System.out.println("✅ Read completed successfully! Found " + count + " engineering employees"); + + assertEquals("Should find 2 engineering employees", 2, count); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ ✅ SUCCESS! Local filesystem works perfectly! ║"); + System.out.println("║ This proves the issue is SeaweedFS-specific! ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + + } catch (Exception e) { + if (e.getMessage() != null && e.getMessage().contains("EOFException") && e.getMessage().contains("78 bytes")) { + System.err.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.err.println("║ ❌ CRITICAL: 78-byte error ALSO occurs with local files! ║"); + System.err.println("║ This proves the issue is NOT SeaweedFS-specific! ║"); + System.err.println("║ The issue is in Spark itself or our test setup! ║"); + System.err.println("╚══════════════════════════════════════════════════════════════╝"); + } + System.err.println("❌ Read FAILED: " + e.getMessage()); + e.printStackTrace(); + fail("Read from local filesystem failed: " + e.getMessage()); + } + } + + @Test + public void testSparkWriteReadMultipleTimes() { + System.out.println("\n=== TEST: Multiple Write/Read Cycles ==="); + + for (int i = 1; i <= 3; i++) { + System.out.println("\n--- Cycle " + i + " ---"); + + List<Employee> employees = Arrays.asList( + new Employee(i * 10 + 1, "Person" + (i * 10 + 1), "Dept" + i, 50000 + i * 10000), + new Employee(i * 10 + 2, "Person" + (i * 10 + 2), "Dept" + i, 60000 + i * 10000)); + + Dataset<Row> df = spark.createDataFrame(employees, Employee.class); + String localPath = "file://" + localTestDir + "/cycle" + i; + + // Write + df.write().mode(SaveMode.Overwrite).parquet(localPath); + System.out.println("✅ Cycle " + i + " write completed"); + + // Read back immediately + Dataset<Row> readDf = spark.read().parquet(localPath); + long count = readDf.count(); + System.out.println("✅ Cycle " + i + " read completed: " + count + " rows"); + + assertEquals("Should have 2 rows", 2, count); + } + + System.out.println("\n✅ All cycles completed successfully!"); + } + + private void deleteDirectory(File directory) { + if (directory.exists()) { + File[] files = directory.listFiles(); + if (files != null) { + for (File file : files) { + if (file.isDirectory()) { + deleteDirectory(file); + } else { + file.delete(); + } + } + } + directory.delete(); + } + } + + // Employee class for testing + public static class Employee implements java.io.Serializable { + private int id; + private String name; + private String department; + private int salary; + + public Employee() {} + + public Employee(int id, String name, String department, int salary) { + this.id = id; + this.name = name; + this.department = department; + this.salary = salary; + } + + public int getId() { return id; } + public void setId(int id) { this.id = id; } + public String getName() { return name; } + public void setName(String name) { this.name = name; } + public String getDepartment() { return department; } + public void setDepartment(String department) { this.department = department; } + public int getSalary() { return salary; } + public void setSalary(int salary) { this.salary = salary; } + } +} + diff --git a/test/java/spark/src/test/java/seaweed/spark/SparkRawLocalFSTest.java b/test/java/spark/src/test/java/seaweed/spark/SparkRawLocalFSTest.java new file mode 100644 index 000000000..2fd3f4695 --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/SparkRawLocalFSTest.java @@ -0,0 +1,132 @@ +package seaweed.spark; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * Test Spark with Hadoop's RawLocalFileSystem to see if 78-byte error can be reproduced. + * This uses the EXACT same implementation as native local files. + */ +public class SparkRawLocalFSTest extends SparkTestBase { + + private Path testPath; + private FileSystem rawLocalFs; + + @Before + public void setUp() throws IOException { + if (!TESTS_ENABLED) { + return; + } + super.setUpSpark(); + + // Use RawLocalFileSystem explicitly + Configuration conf = new Configuration(); + rawLocalFs = new RawLocalFileSystem(); + rawLocalFs.initialize(java.net.URI.create("file:///"), conf); + + testPath = new Path("/tmp/spark-rawlocal-test-" + System.currentTimeMillis()); + rawLocalFs.delete(testPath, true); + rawLocalFs.mkdirs(testPath); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ CRITICAL TEST: Spark with RawLocalFileSystem ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + System.out.println("Test directory: " + testPath); + } + + @After + public void tearDown() throws IOException { + if (!TESTS_ENABLED) { + return; + } + if (rawLocalFs != null) { + rawLocalFs.delete(testPath, true); + rawLocalFs.close(); + } + super.tearDownSpark(); + } + + @Test + public void testSparkWithRawLocalFileSystem() throws IOException { + skipIfTestsDisabled(); + + System.out.println("\n=== TEST: Write Parquet using RawLocalFileSystem ==="); + + // Create test data (same as SparkSQLTest) + List<Employee> employees = Arrays.asList( + new Employee(1, "Alice", "Engineering", 100000), + new Employee(2, "Bob", "Sales", 80000), + new Employee(3, "Charlie", "Engineering", 120000), + new Employee(4, "David", "Sales", 75000)); + + Dataset<Row> df = spark.createDataFrame(employees, Employee.class); + + // CRITICAL: Use file:// prefix to force local filesystem + String outputPath = "file://" + testPath.toString() + "/employees"; + System.out.println("Writing to: " + outputPath); + + // Write using Spark (will use file:// scheme, which uses RawLocalFileSystem) + df.write().mode(SaveMode.Overwrite).parquet(outputPath); + + System.out.println("✅ Write completed successfully!"); + + // Verify by reading back + System.out.println("\n=== TEST: Read Parquet using RawLocalFileSystem ==="); + System.out.println("Reading from: " + outputPath); + Dataset<Row> employeesDf = spark.read().parquet(outputPath); + employeesDf.createOrReplaceTempView("employees"); + + // Run SQL queries + Dataset<Row> engineeringEmployees = spark.sql( + "SELECT name, salary FROM employees WHERE department = 'Engineering'"); + + long count = engineeringEmployees.count(); + assertEquals(2, count); + System.out.println("✅ Read completed successfully! Found " + count + " engineering employees"); + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ ✅ SUCCESS! RawLocalFileSystem works perfectly! ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝"); + } + + // Employee class for Spark DataFrame + public static class Employee implements java.io.Serializable { + private int id; + private String name; + private String department; + private int salary; + + public Employee() {} // Required for Spark + + public Employee(int id, String name, String department, int salary) { + this.id = id; + this.name = name; + this.department = department; + this.salary = salary; + } + + // Getters and Setters (required for Spark) + public int getId() { return id; } + public void setId(int id) { this.id = id; } + public String getName() { return name; } + public void setName(String name) { this.name = name; } + public String getDepartment() { return department; } + public void setDepartment(String department) { this.department = department; } + public int getSalary() { return salary; } + public void setSalary(int salary) { this.salary = salary; } + } +} diff --git a/test/java/spark/src/test/java/seaweed/spark/SparkReadDirectParquetTest.java b/test/java/spark/src/test/java/seaweed/spark/SparkReadDirectParquetTest.java new file mode 100644 index 000000000..f93d43ce7 --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/SparkReadDirectParquetTest.java @@ -0,0 +1,194 @@ +package seaweed.spark; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.example.data.Group; +import org.apache.parquet.example.data.simple.SimpleGroupFactory; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.example.GroupWriteSupport; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.MessageTypeParser; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.junit.Test; + +import java.io.IOException; + +import static org.junit.Assert.*; + +/** + * Test if Spark can read a Parquet file that was written directly + * (not by Spark) to SeaweedFS. + * + * This isolates whether the 78-byte EOF error is in: + * - Spark's WRITE path (if this test passes) + * - Spark's READ path (if this test also fails) + */ +public class SparkReadDirectParquetTest extends SparkTestBase { + + private static final String SCHEMA_STRING = + "message Employee { " + + " required int32 id; " + + " required binary name (UTF8); " + + " required int32 age; " + + "}"; + + private static final MessageType SCHEMA = MessageTypeParser.parseMessageType(SCHEMA_STRING); + + @Test + public void testSparkReadDirectlyWrittenParquet() throws Exception { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ SPARK READS DIRECTLY-WRITTEN PARQUET FILE TEST ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝\n"); + + String testPath = getSeaweedFSPath("/direct-write-test/employees.parquet"); + + // Step 1: Write Parquet file directly (not via Spark) + System.out.println("=== Step 1: Writing Parquet file directly (bypassing Spark) ==="); + writeParquetFileDirect(testPath); + System.out.println("✅ File written successfully: " + testPath); + + // Step 2: Try to read it with Spark + System.out.println("\n=== Step 2: Reading file with Spark ==="); + try { + Dataset<Row> df = spark.read().parquet(testPath); + + System.out.println("Schema:"); + df.printSchema(); + + long count = df.count(); + System.out.println("Row count: " + count); + + System.out.println("\nData:"); + df.show(); + + // Verify data + assertEquals("Should have 3 rows", 3, count); + + System.out.println("\n✅ SUCCESS! Spark can read directly-written Parquet file!"); + System.out.println("✅ This proves the issue is in SPARK'S WRITE PATH, not read path!"); + + } catch (Exception e) { + System.out.println("\n❌ FAILED! Spark cannot read directly-written Parquet file!"); + System.out.println("Error: " + e.getMessage()); + + if (e.getMessage() != null && e.getMessage().contains("bytes left")) { + System.out.println("🎯 This is the 78-byte EOF error!"); + System.out.println("❌ This means the issue is in SPARK'S READ PATH!"); + } + + e.printStackTrace(); + throw e; + } + } + + @Test + public void testSparkWriteThenRead() throws Exception { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + return; + } + + System.out.println("\n╔══════════════════════════════════════════════════════════════╗"); + System.out.println("║ SPARK WRITES THEN READS PARQUET FILE TEST (BASELINE) ║"); + System.out.println("╚══════════════════════════════════════════════════════════════╝\n"); + + String testPath = getSeaweedFSPath("/spark-write-test/employees"); + + // Step 1: Write with Spark + System.out.println("=== Step 1: Writing Parquet file with Spark ==="); + spark.sql("CREATE TABLE IF NOT EXISTS test_employees (id INT, name STRING, age INT) USING parquet LOCATION '" + testPath + "'"); + spark.sql("INSERT INTO test_employees VALUES (1, 'Alice', 30), (2, 'Bob', 25), (3, 'Charlie', 35)"); + System.out.println("✅ File written by Spark"); + + // Step 2: Try to read it with Spark + System.out.println("\n=== Step 2: Reading file with Spark ==="); + try { + Dataset<Row> df = spark.read().parquet(testPath); + + System.out.println("Schema:"); + df.printSchema(); + + long count = df.count(); + System.out.println("Row count: " + count); + + System.out.println("\nData:"); + df.show(); + + assertEquals("Should have 3 rows", 3, count); + + System.out.println("\n✅ SUCCESS! Spark can read its own Parquet file!"); + + } catch (Exception e) { + System.out.println("\n❌ FAILED! Spark cannot read its own Parquet file!"); + System.out.println("Error: " + e.getMessage()); + + if (e.getMessage() != null && e.getMessage().contains("bytes left")) { + System.out.println("🎯 This is the 78-byte EOF error!"); + } + + e.printStackTrace(); + throw e; + } finally { + spark.sql("DROP TABLE IF EXISTS test_employees"); + } + } + + private void writeParquetFileDirect(String seaweedPath) throws IOException { + Configuration conf = new Configuration(); + conf.set("fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem"); + conf.set("fs.seaweed.filer.host", SEAWEEDFS_HOST); + conf.set("fs.seaweed.filer.port", SEAWEEDFS_PORT); + + FileSystem fs = FileSystem.get(java.net.URI.create("seaweedfs://" + SEAWEEDFS_HOST + ":" + SEAWEEDFS_PORT), conf); + Path path = new Path(seaweedPath); + + // Ensure parent directory exists + fs.mkdirs(path.getParent()); + + GroupWriteSupport.setSchema(SCHEMA, conf); + + try (ParquetWriter<Group> writer = org.apache.parquet.hadoop.example.ExampleParquetWriter.builder(path) + .withConf(conf) + .withWriteMode(org.apache.parquet.hadoop.ParquetFileWriter.Mode.OVERWRITE) + .build()) { + + SimpleGroupFactory factory = new SimpleGroupFactory(SCHEMA); + + // Write same 3 rows as Spark test + System.out.println(" Writing row 1: id=1, name=Alice, age=30"); + Group group1 = factory.newGroup() + .append("id", 1) + .append("name", "Alice") + .append("age", 30); + writer.write(group1); + + System.out.println(" Writing row 2: id=2, name=Bob, age=25"); + Group group2 = factory.newGroup() + .append("id", 2) + .append("name", "Bob") + .append("age", 25); + writer.write(group2); + + System.out.println(" Writing row 3: id=3, name=Charlie, age=35"); + Group group3 = factory.newGroup() + .append("id", 3) + .append("name", "Charlie") + .append("age", 35); + writer.write(group3); + } + + // Verify file was written + org.apache.hadoop.fs.FileStatus status = fs.getFileStatus(path); + System.out.println(" File size: " + status.getLen() + " bytes"); + + fs.close(); + } +} + diff --git a/test/java/spark/src/test/java/seaweed/spark/SparkReadWriteTest.java b/test/java/spark/src/test/java/seaweed/spark/SparkReadWriteTest.java new file mode 100644 index 000000000..10ea1cd3a --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/SparkReadWriteTest.java @@ -0,0 +1,239 @@ +package seaweed.spark; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.*; + +/** + * Integration tests for Spark read/write operations with SeaweedFS. + */ +public class SparkReadWriteTest extends SparkTestBase { + + @Test + public void testWriteAndReadParquet() { + skipIfTestsDisabled(); + + // Create test data + List<Person> people = Arrays.asList( + new Person("Alice", 30), + new Person("Bob", 25), + new Person("Charlie", 35)); + + Dataset<Row> df = spark.createDataFrame(people, Person.class); + + // Write to SeaweedFS + String outputPath = getTestPath("people.parquet"); + df.write().mode(SaveMode.Overwrite).parquet(outputPath); + + // Read back from SeaweedFS + Dataset<Row> readDf = spark.read().parquet(outputPath); + + // Verify + assertEquals(3, readDf.count()); + assertEquals(2, readDf.columns().length); + + List<Row> results = readDf.collectAsList(); + assertTrue(results.stream().anyMatch(r -> "Alice".equals(r.getAs("name")) && (Integer) r.getAs("age") == 30)); + assertTrue(results.stream().anyMatch(r -> "Bob".equals(r.getAs("name")) && (Integer) r.getAs("age") == 25)); + assertTrue(results.stream().anyMatch(r -> "Charlie".equals(r.getAs("name")) && (Integer) r.getAs("age") == 35)); + } + + @Test + public void testWriteAndReadCSV() { + skipIfTestsDisabled(); + + // Create test data + List<Person> people = Arrays.asList( + new Person("Alice", 30), + new Person("Bob", 25)); + + Dataset<Row> df = spark.createDataFrame(people, Person.class); + + // Write to SeaweedFS as CSV + String outputPath = getTestPath("people.csv"); + df.write().mode(SaveMode.Overwrite).option("header", "true").csv(outputPath); + + // Read back from SeaweedFS + Dataset<Row> readDf = spark.read().option("header", "true").option("inferSchema", "true").csv(outputPath); + + // Verify + assertEquals(2, readDf.count()); + assertEquals(2, readDf.columns().length); + } + + @Test + public void testWriteAndReadJSON() { + skipIfTestsDisabled(); + + // Create test data + List<Person> people = Arrays.asList( + new Person("Alice", 30), + new Person("Bob", 25), + new Person("Charlie", 35)); + + Dataset<Row> df = spark.createDataFrame(people, Person.class); + + // Write to SeaweedFS as JSON + String outputPath = getTestPath("people.json"); + df.write().mode(SaveMode.Overwrite).json(outputPath); + + // Read back from SeaweedFS + Dataset<Row> readDf = spark.read().json(outputPath); + + // Verify + assertEquals(3, readDf.count()); + assertEquals(2, readDf.columns().length); + } + + @Test + public void testWritePartitionedData() { + skipIfTestsDisabled(); + + // Create test data with multiple years + List<PersonWithYear> people = Arrays.asList( + new PersonWithYear("Alice", 30, 2020), + new PersonWithYear("Bob", 25, 2021), + new PersonWithYear("Charlie", 35, 2020), + new PersonWithYear("David", 28, 2021)); + + Dataset<Row> df = spark.createDataFrame(people, PersonWithYear.class); + + // Write partitioned data to SeaweedFS + String outputPath = getTestPath("people_partitioned"); + df.write().mode(SaveMode.Overwrite).partitionBy("year").parquet(outputPath); + + // Read back from SeaweedFS + Dataset<Row> readDf = spark.read().parquet(outputPath); + + // Verify + assertEquals(4, readDf.count()); + + // Verify partition filtering works + Dataset<Row> filtered2020 = readDf.filter("year = 2020"); + assertEquals(2, filtered2020.count()); + + Dataset<Row> filtered2021 = readDf.filter("year = 2021"); + assertEquals(2, filtered2021.count()); + } + + @Test + public void testAppendMode() { + skipIfTestsDisabled(); + + String outputPath = getTestPath("people_append.parquet"); + + // Write first batch + List<Person> batch1 = Arrays.asList( + new Person("Alice", 30), + new Person("Bob", 25)); + Dataset<Row> df1 = spark.createDataFrame(batch1, Person.class); + df1.write().mode(SaveMode.Overwrite).parquet(outputPath); + + // Append second batch + List<Person> batch2 = Arrays.asList( + new Person("Charlie", 35), + new Person("David", 28)); + Dataset<Row> df2 = spark.createDataFrame(batch2, Person.class); + df2.write().mode(SaveMode.Append).parquet(outputPath); + + // Read back and verify + Dataset<Row> readDf = spark.read().parquet(outputPath); + assertEquals(4, readDf.count()); + } + + @Test + public void testLargeDataset() { + skipIfTestsDisabled(); + + // Create a larger dataset + Dataset<Row> largeDf = spark.range(0, 10000) + .selectExpr("id as value", "id * 2 as doubled"); + + String outputPath = getTestPath("large_dataset.parquet"); + largeDf.write().mode(SaveMode.Overwrite).parquet(outputPath); + + // Read back and verify + Dataset<Row> readDf = spark.read().parquet(outputPath); + assertEquals(10000, readDf.count()); + + // Verify some data (sort to ensure deterministic order) + Row firstRow = readDf.orderBy("value").first(); + assertEquals(0L, firstRow.getLong(0)); + assertEquals(0L, firstRow.getLong(1)); + } + + // Test data classes + public static class Person implements java.io.Serializable { + private String name; + private int age; + + public Person() { + } + + public Person(String name, int age) { + this.name = name; + this.age = age; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public int getAge() { + return age; + } + + public void setAge(int age) { + this.age = age; + } + } + + public static class PersonWithYear implements java.io.Serializable { + private String name; + private int age; + private int year; + + public PersonWithYear() { + } + + public PersonWithYear(String name, int age, int year) { + this.name = name; + this.age = age; + this.year = year; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public int getAge() { + return age; + } + + public void setAge(int age) { + this.age = age; + } + + public int getYear() { + return year; + } + + public void setYear(int year) { + this.year = year; + } + } +} diff --git a/test/java/spark/src/test/java/seaweed/spark/SparkSQLTest.java b/test/java/spark/src/test/java/seaweed/spark/SparkSQLTest.java new file mode 100644 index 000000000..231952023 --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/SparkSQLTest.java @@ -0,0 +1,278 @@ +package seaweed.spark; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.*; + +/** + * Integration tests for Spark SQL operations with SeaweedFS. + */ +public class SparkSQLTest extends SparkTestBase { + + @Test + public void testCreateTableAndQuery() { + skipIfTestsDisabled(); + + // Create test data + List<Employee> employees = Arrays.asList( + new Employee(1, "Alice", "Engineering", 100000), + new Employee(2, "Bob", "Sales", 80000), + new Employee(3, "Charlie", "Engineering", 120000), + new Employee(4, "David", "Sales", 75000)); + + Dataset<Row> df = spark.createDataFrame(employees, Employee.class); + + // Write to SeaweedFS + String tablePath = getTestPath("employees"); + df.write().mode(SaveMode.Overwrite).parquet(tablePath); + + // Create temporary view + Dataset<Row> employeesDf = spark.read().parquet(tablePath); + employeesDf.createOrReplaceTempView("employees"); + + // Run SQL queries + Dataset<Row> engineeringEmployees = spark.sql( + "SELECT name, salary FROM employees WHERE department = 'Engineering'"); + + assertEquals(2, engineeringEmployees.count()); + + Dataset<Row> highPaidEmployees = spark.sql( + "SELECT name, salary FROM employees WHERE salary > 90000"); + + assertEquals(2, highPaidEmployees.count()); + } + + @Test + public void testAggregationQueries() { + skipIfTestsDisabled(); + + // Create sales data + List<Sale> sales = Arrays.asList( + new Sale("2024-01", "Product A", 100), + new Sale("2024-01", "Product B", 150), + new Sale("2024-02", "Product A", 120), + new Sale("2024-02", "Product B", 180), + new Sale("2024-03", "Product A", 110)); + + Dataset<Row> df = spark.createDataFrame(sales, Sale.class); + + // Write to SeaweedFS + String tablePath = getTestPath("sales"); + df.write().mode(SaveMode.Overwrite).parquet(tablePath); + + // Create temporary view + Dataset<Row> salesDf = spark.read().parquet(tablePath); + salesDf.createOrReplaceTempView("sales"); + + // Aggregate query + Dataset<Row> monthlySales = spark.sql( + "SELECT month, SUM(amount) as total FROM sales GROUP BY month ORDER BY month"); + + List<Row> results = monthlySales.collectAsList(); + assertEquals(3, results.size()); + assertEquals("2024-01", results.get(0).getString(0)); + assertEquals(250, results.get(0).getLong(1)); + } + + @Test + public void testJoinOperations() { + skipIfTestsDisabled(); + + // Create employee data + List<Employee> employees = Arrays.asList( + new Employee(1, "Alice", "Engineering", 100000), + new Employee(2, "Bob", "Sales", 80000)); + + // Create department data + List<Department> departments = Arrays.asList( + new Department("Engineering", "Building Products"), + new Department("Sales", "Selling Products")); + + Dataset<Row> empDf = spark.createDataFrame(employees, Employee.class); + Dataset<Row> deptDf = spark.createDataFrame(departments, Department.class); + + // Write to SeaweedFS + String empPath = getTestPath("employees_join"); + String deptPath = getTestPath("departments_join"); + + empDf.write().mode(SaveMode.Overwrite).parquet(empPath); + deptDf.write().mode(SaveMode.Overwrite).parquet(deptPath); + + // Read back and create views + spark.read().parquet(empPath).createOrReplaceTempView("emp"); + spark.read().parquet(deptPath).createOrReplaceTempView("dept"); + + // Join query + Dataset<Row> joined = spark.sql( + "SELECT e.name, e.salary, d.description " + + "FROM emp e JOIN dept d ON e.department = d.name"); + + assertEquals(2, joined.count()); + + List<Row> results = joined.collectAsList(); + assertTrue(results.stream() + .anyMatch(r -> "Alice".equals(r.getString(0)) && "Building Products".equals(r.getString(2)))); + } + + @Test + public void testWindowFunctions() { + skipIfTestsDisabled(); + + // Create employee data with salaries + List<Employee> employees = Arrays.asList( + new Employee(1, "Alice", "Engineering", 100000), + new Employee(2, "Bob", "Engineering", 120000), + new Employee(3, "Charlie", "Sales", 80000), + new Employee(4, "David", "Sales", 90000)); + + Dataset<Row> df = spark.createDataFrame(employees, Employee.class); + + String tablePath = getTestPath("employees_window"); + df.write().mode(SaveMode.Overwrite).parquet(tablePath); + + Dataset<Row> employeesDf = spark.read().parquet(tablePath); + employeesDf.createOrReplaceTempView("employees_ranked"); + + // Window function query - rank employees by salary within department + Dataset<Row> ranked = spark.sql( + "SELECT name, department, salary, " + + "RANK() OVER (PARTITION BY department ORDER BY salary DESC) as rank " + + "FROM employees_ranked"); + + assertEquals(4, ranked.count()); + + // Verify Bob has rank 1 in Engineering (highest salary) + List<Row> results = ranked.collectAsList(); + Row bobRow = results.stream() + .filter(r -> "Bob".equals(r.getString(0))) + .findFirst() + .orElse(null); + + assertNotNull(bobRow); + assertEquals(1, bobRow.getInt(3)); + } + + // Test data classes + public static class Employee implements java.io.Serializable { + private int id; + private String name; + private String department; + private int salary; + + public Employee() { + } + + public Employee(int id, String name, String department, int salary) { + this.id = id; + this.name = name; + this.department = department; + this.salary = salary; + } + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getDepartment() { + return department; + } + + public void setDepartment(String department) { + this.department = department; + } + + public int getSalary() { + return salary; + } + + public void setSalary(int salary) { + this.salary = salary; + } + } + + public static class Sale implements java.io.Serializable { + private String month; + private String product; + private int amount; + + public Sale() { + } + + public Sale(String month, String product, int amount) { + this.month = month; + this.product = product; + this.amount = amount; + } + + public String getMonth() { + return month; + } + + public void setMonth(String month) { + this.month = month; + } + + public String getProduct() { + return product; + } + + public void setProduct(String product) { + this.product = product; + } + + public int getAmount() { + return amount; + } + + public void setAmount(int amount) { + this.amount = amount; + } + } + + public static class Department implements java.io.Serializable { + private String name; + private String description; + + public Department() { + } + + public Department(String name, String description) { + this.name = name; + this.description = description; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + } +} diff --git a/test/java/spark/src/test/java/seaweed/spark/SparkTestBase.java b/test/java/spark/src/test/java/seaweed/spark/SparkTestBase.java new file mode 100644 index 000000000..5b17e6f2d --- /dev/null +++ b/test/java/spark/src/test/java/seaweed/spark/SparkTestBase.java @@ -0,0 +1,128 @@ +package seaweed.spark; + +import org.apache.hadoop.conf.Configuration; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.SparkSession; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; + +/** + * Base class for Spark integration tests with SeaweedFS. + * + * These tests require a running SeaweedFS cluster. + * Set environment variable SEAWEEDFS_TEST_ENABLED=true to enable these tests. + */ +public abstract class SparkTestBase { + + protected SparkSession spark; + protected static final String TEST_ROOT = "/test-spark"; + protected static final boolean TESTS_ENABLED = "true".equalsIgnoreCase(System.getenv("SEAWEEDFS_TEST_ENABLED")); + + // SeaweedFS connection settings + protected static final String SEAWEEDFS_HOST = System.getenv().getOrDefault("SEAWEEDFS_FILER_HOST", "localhost"); + protected static final String SEAWEEDFS_PORT = System.getenv().getOrDefault("SEAWEEDFS_FILER_PORT", "8888"); + protected static final String SEAWEEDFS_GRPC_PORT = System.getenv().getOrDefault("SEAWEEDFS_FILER_GRPC_PORT", + "18888"); + + @Before + public void setUpSpark() throws IOException { + if (!TESTS_ENABLED) { + return; + } + + SparkConf sparkConf = new SparkConf() + .setAppName("SeaweedFS Integration Test") + .setMaster("local[1]") // Single thread to avoid concurrent gRPC issues + .set("spark.driver.host", "localhost") + .set("spark.sql.warehouse.dir", getSeaweedFSPath("/spark-warehouse")) + // SeaweedFS configuration + .set("spark.hadoop.fs.defaultFS", String.format("seaweedfs://%s:%s", SEAWEEDFS_HOST, SEAWEEDFS_PORT)) + .set("spark.hadoop.fs.seaweedfs.impl", "seaweed.hdfs.SeaweedFileSystem") + .set("spark.hadoop.fs.seaweed.impl", "seaweed.hdfs.SeaweedFileSystem") + .set("spark.hadoop.fs.seaweed.filer.host", SEAWEEDFS_HOST) + .set("spark.hadoop.fs.seaweed.filer.port", SEAWEEDFS_PORT) + .set("spark.hadoop.fs.seaweed.filer.port.grpc", SEAWEEDFS_GRPC_PORT) + .set("spark.hadoop.fs.AbstractFileSystem.seaweedfs.impl", "seaweed.hdfs.SeaweedAbstractFileSystem") + // Set replication to empty string to use filer default + .set("spark.hadoop.fs.seaweed.replication", "") + // Smaller buffer to reduce load + .set("spark.hadoop.fs.seaweed.buffer.size", "1048576") // 1MB + // Reduce parallelism + .set("spark.default.parallelism", "1") + .set("spark.sql.shuffle.partitions", "1") + // Simpler output committer + .set("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version", "2") + .set("spark.sql.sources.commitProtocolClass", + "org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol") + // Disable speculative execution to reduce load + .set("spark.speculation", "false") + // Increase task retry to handle transient consistency issues + .set("spark.task.maxFailures", "4") + // Wait longer before retrying failed tasks + .set("spark.task.reaper.enabled", "true") + .set("spark.task.reaper.pollingInterval", "1s"); + + spark = SparkSession.builder() + .config(sparkConf) + .getOrCreate(); + + // Clean up test directory + cleanupTestDirectory(); + } + + @After + public void tearDownSpark() { + if (!TESTS_ENABLED || spark == null) { + return; + } + + try { + // Try to cleanup but don't fail if it doesn't work + cleanupTestDirectory(); + } catch (Exception e) { + System.err.println("Cleanup failed: " + e.getMessage()); + } finally { + try { + spark.stop(); + } catch (Exception e) { + System.err.println("Spark stop failed: " + e.getMessage()); + } + spark = null; + } + } + + protected String getSeaweedFSPath(String path) { + return String.format("seaweedfs://%s:%s%s", SEAWEEDFS_HOST, SEAWEEDFS_PORT, path); + } + + protected String getTestPath(String subPath) { + return getSeaweedFSPath(TEST_ROOT + "/" + subPath); + } + + private void cleanupTestDirectory() { + if (spark != null) { + try { + Configuration conf = spark.sparkContext().hadoopConfiguration(); + org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get( + java.net.URI.create(getSeaweedFSPath("/")), conf); + org.apache.hadoop.fs.Path testPath = new org.apache.hadoop.fs.Path(TEST_ROOT); + if (fs.exists(testPath)) { + fs.delete(testPath, true); + } + } catch (Exception e) { + // Suppress cleanup errors - they shouldn't fail tests + // Common in distributed systems with eventual consistency + System.err.println("Warning: cleanup failed (non-critical): " + e.getMessage()); + } + } + } + + protected void skipIfTestsDisabled() { + if (!TESTS_ENABLED) { + System.out.println("Skipping test - SEAWEEDFS_TEST_ENABLED not set"); + org.junit.Assume.assumeTrue("SEAWEEDFS_TEST_ENABLED not set", false); + } + } +} diff --git a/test/java/spark/src/test/resources/log4j.properties b/test/java/spark/src/test/resources/log4j.properties new file mode 100644 index 000000000..0a603e1b0 --- /dev/null +++ b/test/java/spark/src/test/resources/log4j.properties @@ -0,0 +1,19 @@ +# Set root logger level +log4j.rootLogger=WARN, console + +# Console appender +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Set log levels for specific packages +log4j.logger.org.apache.spark=WARN +log4j.logger.org.apache.hadoop=WARN +log4j.logger.org.apache.parquet=WARN +log4j.logger.seaweed=INFO + +# Suppress unnecessary warnings +log4j.logger.org.apache.spark.util.Utils=ERROR +log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR + diff --git a/test/kafka/go.mod b/test/kafka/go.mod index b0f66885f..f0a99e7fe 100644 --- a/test/kafka/go.mod +++ b/test/kafka/go.mod @@ -10,7 +10,7 @@ require ( github.com/seaweedfs/seaweedfs v0.0.0-00010101000000-000000000000 github.com/segmentio/kafka-go v0.4.49 github.com/stretchr/testify v1.11.1 - google.golang.org/grpc v1.75.1 + google.golang.org/grpc v1.77.0 ) replace github.com/seaweedfs/seaweedfs => ../../ @@ -18,7 +18,7 @@ replace github.com/seaweedfs/seaweedfs => ../../ require ( cloud.google.com/go/auth v0.16.5 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect - cloud.google.com/go/compute/metadata v0.8.0 // indirect + cloud.google.com/go/compute/metadata v0.9.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect @@ -95,7 +95,7 @@ require ( github.com/geoffgarside/ber v1.2.0 // indirect github.com/go-chi/chi/v5 v5.2.2 // indirect github.com/go-darwin/apfs v0.0.0-20211011131704-f84b94dbf348 // indirect - github.com/go-jose/go-jose/v4 v4.1.1 // indirect + github.com/go-jose/go-jose/v4 v4.1.3 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect @@ -200,7 +200,7 @@ require ( github.com/spf13/cast v1.10.0 // indirect github.com/spf13/pflag v1.0.10 // indirect github.com/spf13/viper v1.21.0 // indirect - github.com/spiffe/go-spiffe/v2 v2.5.0 // indirect + github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 // indirect github.com/t3rm1n4l/go-mega v0.0.0-20250926104142-ccb8d3498e6c // indirect @@ -222,27 +222,27 @@ require ( github.com/zeebo/xxh3 v1.0.2 // indirect go.etcd.io/bbolt v1.4.2 // indirect go.mongodb.org/mongo-driver v1.17.6 // indirect - go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect - go.opentelemetry.io/otel v1.37.0 // indirect - go.opentelemetry.io/otel/metric v1.37.0 // indirect - go.opentelemetry.io/otel/trace v1.37.0 // indirect + go.opentelemetry.io/otel v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/crypto v0.45.0 // indirect golang.org/x/exp v0.0.0-20250811191247-51f88131bc50 // indirect golang.org/x/image v0.33.0 // indirect golang.org/x/net v0.47.0 // indirect - golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/oauth2 v0.32.0 // indirect golang.org/x/sync v0.18.0 // indirect golang.org/x/sys v0.38.0 // indirect golang.org/x/term v0.37.0 // indirect golang.org/x/text v0.31.0 // indirect golang.org/x/time v0.12.0 // indirect google.golang.org/api v0.247.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20251111163417-95abcf5c77ba // indirect google.golang.org/grpc/security/advancedtls v1.0.0 // indirect - google.golang.org/protobuf v1.36.9 // indirect + google.golang.org/protobuf v1.36.10 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/validator.v2 v2.0.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/test/kafka/go.sum b/test/kafka/go.sum index 3295407b4..09553d9d4 100644 --- a/test/kafka/go.sum +++ b/test/kafka/go.sum @@ -23,8 +23,8 @@ cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvf cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute/metadata v0.8.0 h1:HxMRIbao8w17ZX6wBnjhcDkW6lTFpgcaobyVfZWqRLA= -cloud.google.com/go/compute/metadata v0.8.0/go.mod h1:sYOGTp851OV9bOFJ9CH7elVvyzopvWQFNNghtDQ/Biw= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= @@ -248,8 +248,8 @@ github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3Bop github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-jose/go-jose/v4 v4.1.1 h1:JYhSgy4mXXzAdF3nUx3ygx347LRXJRrpgyU3adRmkAI= -github.com/go-jose/go-jose/v4 v4.1.1/go.mod h1:BdsZGqgdO3b6tTc6LSE56wcDbMMLuPsw5d4ZD5f94kA= +github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= +github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -597,8 +597,8 @@ github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU= github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY= -github.com/spiffe/go-spiffe/v2 v2.5.0 h1:N2I01KCUkv1FAjZXJMwh95KK1ZIQLYbPfhaxw8WS0hE= -github.com/spiffe/go-spiffe/v2 v2.5.0/go.mod h1:P+NxobPc6wXhVtINNtFjNWGBTreew1GBUCwT2wPmb7g= +github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo= +github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -692,20 +692,20 @@ go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 h1:Hf9xI/XLML9ElpiHVDNwvqI0hIFlzV8dgIr35kV1kRU= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0/go.mod h1:NfchwuyNoMcZ5MLHwPrODwUF1HWCXWrL31s8gSAdIKY= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= -go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= -go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= -go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU= @@ -823,8 +823,8 @@ golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4Iltr golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= -golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= +golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1042,10 +1042,10 @@ google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20250715232539-7130f93afb79 h1:Nt6z9UHqSlIdIGJdz6KhTIs2VRx/iOsA5iE8bmQNcxs= google.golang.org/genproto v0.0.0-20250715232539-7130f93afb79/go.mod h1:kTmlBHMPqR5uCZPBvwa2B18mvubkjyY3CRLI0c6fj0s= -google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c h1:AtEkQdl5b6zsybXcbz00j1LwNodDuH6hVifIaNqk7NQ= -google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c/go.mod h1:ea2MjsO70ssTfCjiwHgI0ZFqcw45Ksuk2ckf9G468GA= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c h1:qXWI/sQtv5UKboZ/zUk7h+mrf/lXORyI+n9DKDAusdg= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c/go.mod h1:gw1tLEfykwDz2ET4a12jcXt4couGAm7IwsVaTy0Sflo= +google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 h1:mepRgnBZa07I4TRuomDE4sTIYieg/osKmzIf4USdWS4= +google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8/go.mod h1:fDMmzKV90WSg1NbozdqrE64fkuTv6mlq2zxo9ad+3yo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251111163417-95abcf5c77ba h1:UKgtfRM7Yh93Sya0Fo8ZzhDP4qBckrrxEr2oF5UIVb8= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251111163417-95abcf5c77ba/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -1058,10 +1058,10 @@ google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKa google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.75.1 h1:/ODCNEuf9VghjgO3rqLcfg8fiOP0nSluljWFlDxELLI= -google.golang.org/grpc v1.75.1/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= -google.golang.org/grpc/examples v0.0.0-20230224211313-3775f633ce20 h1:MLBCGN1O7GzIx+cBiwfYPwtmZ41U3Mn/cotLJciaArI= -google.golang.org/grpc/examples v0.0.0-20230224211313-3775f633ce20/go.mod h1:Nr5H8+MlGWr5+xX/STzdoEqJrO+YteqFbMyCsrb6mH0= +google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM= +google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig= +google.golang.org/grpc/examples v0.0.0-20250407062114-b368379ef8f6 h1:ExN12ndbJ608cboPYflpTny6mXSzPrDLh0iTaVrRrds= +google.golang.org/grpc/examples v0.0.0-20250407062114-b368379ef8f6/go.mod h1:6ytKWczdvnpnO+m+JiG9NjEDzR1FJfsnmJdG7B8QVZ8= google.golang.org/grpc/security/advancedtls v1.0.0 h1:/KQ7VP/1bs53/aopk9QhuPyFAp9Dm9Ejix3lzYkCrDA= google.golang.org/grpc/security/advancedtls v1.0.0/go.mod h1:o+s4go+e1PJ2AjuQMY5hU82W7lDlefjJA6FqEHRVHWk= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -1074,8 +1074,8 @@ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw= -google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/weed/pb/grpc_client_server.go b/weed/pb/grpc_client_server.go index e822c36c8..9caf1f511 100644 --- a/weed/pb/grpc_client_server.go +++ b/weed/pb/grpc_client_server.go @@ -52,16 +52,20 @@ func NewGrpcServer(opts ...grpc.ServerOption) *grpc.Server { var options []grpc.ServerOption options = append(options, grpc.KeepaliveParams(keepalive.ServerParameters{ - Time: 10 * time.Second, // wait time before ping if no activity + Time: 30 * time.Second, // wait time before ping if no activity (match client) Timeout: 20 * time.Second, // ping timeout // MaxConnectionAge: 10 * time.Hour, }), grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ - MinTime: 60 * time.Second, // min time a client should wait before sending a ping + MinTime: 30 * time.Second, // min time a client should wait before sending a ping (match client) PermitWithoutStream: true, }), grpc.MaxRecvMsgSize(Max_Message_Size), grpc.MaxSendMsgSize(Max_Message_Size), + grpc.MaxConcurrentStreams(1000), // Allow more concurrent streams + grpc.InitialWindowSize(16*1024*1024), // 16MB initial window + grpc.InitialConnWindowSize(16*1024*1024), // 16MB connection window + grpc.MaxHeaderListSize(8*1024*1024), // 8MB header list limit grpc.UnaryInterceptor(requestIDUnaryInterceptor()), ) for _, opt := range opts { |
