[SPARK-48250][PYTHON][CONNECT][TESTS] Enable array inference tests at…

… test_parity_types.py ### What changes were proposed in this pull request? This PR proposes to enable some array inference tests at test_parity_types.py ### Why are the changes needed? For better test coverage for Spark Connect. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? CI in this PR should verify them. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46550 from HyukjinKwon/SPARK-48250. Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org> Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
apache · May 13, 2024 · 13b0d1a · 13b0d1a
1 parent acc3753
commit 13b0d1a
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 13 deletions.
diff --git a/python/pyspark/sql/tests/connect/test_parity_types.py b/python/pyspark/sql/tests/connect/test_parity_types.py
@@ -39,12 +39,8 @@ def test_create_dataframe_schema_mismatch(self):
         super().test_create_dataframe_schema_mismatch()
 
     @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
-    def test_infer_array_element_type_empty(self):
-        super().test_infer_array_element_type_empty()
-
-    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
-    def test_infer_array_element_type_with_struct(self):
-        super().test_infer_array_element_type_with_struct()
+    def test_infer_array_element_type_empty_rdd(self):
+        super().test_infer_array_element_type_empty_rdd()
 
     @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
     def test_infer_array_merge_element_types_with_rdd(self):

diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
@@ -366,7 +366,7 @@ def test_infer_array_merge_element_types_with_rdd(self):
         df = self.spark.createDataFrame(rdd)
         self.assertEqual(Row(f1=[1, None], f2=[None, 2]), df.first())
 
-    def test_infer_array_element_type_empty(self):
+    def test_infer_array_element_type_empty_rdd(self):
         # SPARK-39168: Test inferring array element type from all rows
         ArrayRow = Row("f1")
 
@@ -379,6 +379,12 @@ def test_infer_array_element_type_empty(self):
         self.assertEqual(Row(f1=[None]), rows[1])
         self.assertEqual(Row(f1=[1]), rows[2])
 
+    def test_infer_array_element_type_empty(self):
+        # SPARK-39168: Test inferring array element type from all rows
+        ArrayRow = Row("f1")
+
+        data = [ArrayRow([]), ArrayRow([None]), ArrayRow([1])]
+
         df = self.spark.createDataFrame(data)
         rows = df.collect()
         self.assertEqual(Row(f1=[]), rows[0])
@@ -392,12 +398,6 @@ def test_infer_array_element_type_with_struct(self):
         with self.sql_conf({"spark.sql.pyspark.inferNestedDictAsStruct.enabled": True}):
             data = [NestedRow([{"payment": 200.5}, {"name": "A"}])]
 
-            nestedRdd = self.sc.parallelize(data)
-            df = self.spark.createDataFrame(nestedRdd)
-            self.assertEqual(
-                Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first()
-            )
-
             df = self.spark.createDataFrame(data)
             self.assertEqual(
                 Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first()