S3: Add tests for PyArrow with native S3 filesystem (#7508)

* PyArrow native S3 filesystem * add sse-s3 tests * update * minor * ENABLE_SSE_S3 * Update test_pyarrow_native_s3.py * clean up * refactoring * Update test_pyarrow_native_s3.py
author: Chris Lu <chrislusf@users.noreply.github.com> 2025-11-19 13:49:22 -0800
committer: GitHub <noreply@github.com> 2025-11-19 13:49:22 -0800
commit: 8be9e258fc7d1110421aaee451945668cafa23e7 (patch)
tree: 945fc21ce75a9a223825efe9996fb63d8f6a2067 /test/s3/parquet/parquet_test_utils.py
parent: ca84a8a7131e2be81ead697c472bf967548d97ec (diff)
download: seaweedfs-8be9e258fc7d1110421aaee451945668cafa23e7.tar.xz
seaweedfs-8be9e258fc7d1110421aaee451945668cafa23e7.zip
1 files changed, 41 insertions, 0 deletions
diff --git a/test/s3/parquet/parquet_test_utils.py b/test/s3/parquet/parquet_test_utils.py
new file mode 100644
index 000000000..d7e4c43db
--- /dev/null
+++ b/test/s3/parquet/parquet_test_utils.py
@@ -0,0 +1,41 @@
+"""
+Shared utility functions for PyArrow Parquet tests.
+
+This module provides common test utilities used across multiple test scripts
+to avoid code duplication and ensure consistency.
+"""
+
+import pyarrow as pa
+
+
+def create_sample_table(num_rows: int = 5) -> pa.Table:
+    """Create a sample PyArrow table for testing.
+    
+    Args:
+        num_rows: Number of rows to generate (default: 5)
+    
+    Returns:
+        PyArrow Table with test data containing:
+        - id: int64 sequential IDs (0 to num_rows-1)
+        - name: string user names (user_0, user_1, ...)
+        - value: float64 values (id * 1.5)
+        - flag: bool alternating True/False based on even/odd id
+    
+    Example:
+        >>> table = create_sample_table(3)
+        >>> print(table)
+        pyarrow.Table
+        id: int64
+        name: string
+        value: double
+        flag: bool
+    """
+    return pa.table(
+        {
+            "id": pa.array(range(num_rows), type=pa.int64()),
+            "name": pa.array([f"user_{i}" for i in range(num_rows)], type=pa.string()),
+            "value": pa.array([float(i) * 1.5 for i in range(num_rows)], type=pa.float64()),
+            "flag": pa.array([i % 2 == 0 for i in range(num_rows)], type=pa.bool_()),
+        }
+    )
+
author	Chris Lu <chrislusf@users.noreply.github.com>	2025-11-19 13:49:22 -0800
committer	GitHub <noreply@github.com>	2025-11-19 13:49:22 -0800
commit	8be9e258fc7d1110421aaee451945668cafa23e7 (patch)
tree	945fc21ce75a9a223825efe9996fb63d8f6a2067 /test/s3/parquet/parquet_test_utils.py
parent	ca84a8a7131e2be81ead697c472bf967548d97ec (diff)
download	seaweedfs-8be9e258fc7d1110421aaee451945668cafa23e7.tar.xz seaweedfs-8be9e258fc7d1110421aaee451945668cafa23e7.zip