Skip to content

Commit

Permalink
[SPARK-48248][PYTHON] Fix nested array to respect legacy conf of infe…
Browse files Browse the repository at this point in the history
…rArrayTypeFromFirstElement

This PR fixes a bug that does not respect `spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled` in nested arrays, introduced by #36545.

To have a way to restore the original behaviour.

Yes, it fixes the regression when `spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled` is set to `True`.

Unittest added.

No.

Closes #46548 from HyukjinKwon/SPARK-48248.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit b2140d0)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
HyukjinKwon committed May 13, 2024
1 parent e9a1b42 commit ab511a7
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 2 deletions.
7 changes: 7 additions & 0 deletions python/pyspark/sql/tests/test_types.py
Expand Up @@ -1275,6 +1275,13 @@ def test_yearmonth_interval_type(self):
schema3 = self.spark.sql("SELECT INTERVAL '8' MONTH AS interval").schema
self.assertEqual(schema3.fields[0].dataType, YearMonthIntervalType(1, 1))

def test_infer_array_element_type_with_struct(self):
# SPARK-48248: Nested array to respect legacy conf of inferArrayTypeFromFirstElement
with self.sql_conf(
{"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}
):
self.assertEqual([[1, None]], self.spark.createDataFrame([[[[1, "a"]]]]).first()[0])


class DataTypeTests(unittest.TestCase):
# regression test for SPARK-6055
Expand Down
18 changes: 16 additions & 2 deletions python/pyspark/sql/types.py
Expand Up @@ -1606,13 +1606,27 @@ def _infer_type(
if len(obj) > 0:
if infer_array_from_first_element:
return ArrayType(
_infer_type(obj[0], infer_dict_as_struct, prefer_timestamp_ntz), True
_infer_type(
obj[0],
infer_dict_as_struct,
infer_array_from_first_element,
prefer_timestamp_ntz,
),
True,
)
else:
return ArrayType(
reduce(
_merge_type,
(_infer_type(v, infer_dict_as_struct, prefer_timestamp_ntz) for v in obj),
(
_infer_type(
v,
infer_dict_as_struct,
infer_array_from_first_element,
prefer_timestamp_ntz,
)
for v in obj
),
),
True,
)
Expand Down

0 comments on commit ab511a7

Please sign in to comment.