Skip to content

Commit

Permalink
[SPARK-48250][PYTHON][CONNECT][TESTS] Enable array inference tests at…
Browse files Browse the repository at this point in the history
… test_parity_types.py

### What changes were proposed in this pull request?

This PR proposes to enable some array inference tests at test_parity_types.py

### Why are the changes needed?

For better test coverage for Spark Connect.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

CI in this PR should verify them.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46550 from HyukjinKwon/SPARK-48250.

Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
HyukjinKwon and HyukjinKwon committed May 13, 2024
1 parent acc3753 commit 13b0d1a
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 13 deletions.
8 changes: 2 additions & 6 deletions python/pyspark/sql/tests/connect/test_parity_types.py
Expand Up @@ -39,12 +39,8 @@ def test_create_dataframe_schema_mismatch(self):
super().test_create_dataframe_schema_mismatch()

@unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
def test_infer_array_element_type_empty(self):
super().test_infer_array_element_type_empty()

@unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
def test_infer_array_element_type_with_struct(self):
super().test_infer_array_element_type_with_struct()
def test_infer_array_element_type_empty_rdd(self):
super().test_infer_array_element_type_empty_rdd()

@unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
def test_infer_array_merge_element_types_with_rdd(self):
Expand Down
14 changes: 7 additions & 7 deletions python/pyspark/sql/tests/test_types.py
Expand Up @@ -366,7 +366,7 @@ def test_infer_array_merge_element_types_with_rdd(self):
df = self.spark.createDataFrame(rdd)
self.assertEqual(Row(f1=[1, None], f2=[None, 2]), df.first())

def test_infer_array_element_type_empty(self):
def test_infer_array_element_type_empty_rdd(self):
# SPARK-39168: Test inferring array element type from all rows
ArrayRow = Row("f1")

Expand All @@ -379,6 +379,12 @@ def test_infer_array_element_type_empty(self):
self.assertEqual(Row(f1=[None]), rows[1])
self.assertEqual(Row(f1=[1]), rows[2])

def test_infer_array_element_type_empty(self):
# SPARK-39168: Test inferring array element type from all rows
ArrayRow = Row("f1")

data = [ArrayRow([]), ArrayRow([None]), ArrayRow([1])]

df = self.spark.createDataFrame(data)
rows = df.collect()
self.assertEqual(Row(f1=[]), rows[0])
Expand All @@ -392,12 +398,6 @@ def test_infer_array_element_type_with_struct(self):
with self.sql_conf({"spark.sql.pyspark.inferNestedDictAsStruct.enabled": True}):
data = [NestedRow([{"payment": 200.5}, {"name": "A"}])]

nestedRdd = self.sc.parallelize(data)
df = self.spark.createDataFrame(nestedRdd)
self.assertEqual(
Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first()
)

df = self.spark.createDataFrame(data)
self.assertEqual(
Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first()
Expand Down

0 comments on commit 13b0d1a

Please sign in to comment.