diff options
| author | Chris Lu <chrislusf@users.noreply.github.com> | 2025-11-19 13:49:22 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-11-19 13:49:22 -0800 |
| commit | 8be9e258fc7d1110421aaee451945668cafa23e7 (patch) | |
| tree | 945fc21ce75a9a223825efe9996fb63d8f6a2067 /test/s3/parquet/parquet_test_utils.py | |
| parent | ca84a8a7131e2be81ead697c472bf967548d97ec (diff) | |
| download | seaweedfs-8be9e258fc7d1110421aaee451945668cafa23e7.tar.xz seaweedfs-8be9e258fc7d1110421aaee451945668cafa23e7.zip | |
S3: Add tests for PyArrow with native S3 filesystem (#7508)
* PyArrow native S3 filesystem
* add sse-s3 tests
* update
* minor
* ENABLE_SSE_S3
* Update test_pyarrow_native_s3.py
* clean up
* refactoring
* Update test_pyarrow_native_s3.py
Diffstat (limited to 'test/s3/parquet/parquet_test_utils.py')
| -rw-r--r-- | test/s3/parquet/parquet_test_utils.py | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/test/s3/parquet/parquet_test_utils.py b/test/s3/parquet/parquet_test_utils.py new file mode 100644 index 000000000..d7e4c43db --- /dev/null +++ b/test/s3/parquet/parquet_test_utils.py @@ -0,0 +1,41 @@ +""" +Shared utility functions for PyArrow Parquet tests. + +This module provides common test utilities used across multiple test scripts +to avoid code duplication and ensure consistency. +""" + +import pyarrow as pa + + +def create_sample_table(num_rows: int = 5) -> pa.Table: + """Create a sample PyArrow table for testing. + + Args: + num_rows: Number of rows to generate (default: 5) + + Returns: + PyArrow Table with test data containing: + - id: int64 sequential IDs (0 to num_rows-1) + - name: string user names (user_0, user_1, ...) + - value: float64 values (id * 1.5) + - flag: bool alternating True/False based on even/odd id + + Example: + >>> table = create_sample_table(3) + >>> print(table) + pyarrow.Table + id: int64 + name: string + value: double + flag: bool + """ + return pa.table( + { + "id": pa.array(range(num_rows), type=pa.int64()), + "name": pa.array([f"user_{i}" for i in range(num_rows)], type=pa.string()), + "value": pa.array([float(i) * 1.5 for i in range(num_rows)], type=pa.float64()), + "flag": pa.array([i % 2 == 0 for i in range(num_rows)], type=pa.bool_()), + } + ) + |
