Skip to content

Commit

Permalink
[GLUTEN-1336][VL] add spark3.3 UT under connector and expression (#1685)
Browse files Browse the repository at this point in the history
* [GLUTEN-1336][VL] add spark3.3 UT under connector and expression
  • Loading branch information
yma11 committed Jun 2, 2023
1 parent ea267ed commit 87fe315
Show file tree
Hide file tree
Showing 34 changed files with 1,068 additions and 3 deletions.
Expand Up @@ -189,6 +189,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenDecimalExpressionSuite]
enableSuite[GlutenStringFunctionsSuite]
enableSuite[GlutenRegexpExpressionsSuite]
enableSuite[GlutenNullExpressionsSuite]
enableSuite[GlutenPredicateSuite]
enableSuite[GlutenMathExpressionsSuite]
enableSuite[GlutenMathFunctionsSuite]
Expand Down
Expand Up @@ -18,14 +18,147 @@
package io.glutenproject.utils.velox

import io.glutenproject.utils.BackendTestSettings
import org.apache.spark.sql.catalyst.expressions.GlutenMathExpressionsSuite
import org.apache.spark.sql.{GlutenBloomFilterAggregateQuerySuite, GlutenStringFunctionsSuite}
import org.apache.spark.sql.catalyst.expressions.{GlutenAnsiCastSuiteWithAnsiModeOff, GlutenAnsiCastSuiteWithAnsiModeOn, GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCastSuiteWithAnsiModeOn, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenHashExpressionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite, GlutenTryCastSuite}
import org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite, GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite, GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuite, GlutenDataSourceV2Suite, GlutenFileDataSourceV2FallBackSuite, GlutenLocalScanSuite, GlutenSupportsCatalogOptionsSuite, GlutenTableCapabilityCheckSuite, GlutenWriteDistributionAndOrderingSuite}
import org.apache.spark.sql.{GlutenBloomFilterAggregateQuerySuite, GlutenJsonFunctionsSuite, GlutenStringFunctionsSuite}

class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenStringFunctionsSuite]
enableSuite[GlutenBloomFilterAggregateQuerySuite]
// fallback might_contain, the input argument binary is not same with vanilla spark
.exclude("Test NULL inputs for might_contain")
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2DataFrameSuite]
enableSuite[GlutenDataSourceV2FunctionSuite]
enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSuite]
enableSuite[GlutenDataSourceV2Suite]
// Gluten does not support the convert from spark columnar data
// to velox columnar data.
.exclude("columnar batch scan implementation")
// Rewrite the following test in GlutenDataSourceV2Suite.
.exclude("partitioning reporting")
enableSuite[GlutenFileDataSourceV2FallBackSuite]
enableSuite[GlutenLocalScanSuite]
enableSuite[GlutenSupportsCatalogOptionsSuite]
enableSuite[GlutenTableCapabilityCheckSuite]
enableSuite[GlutenWriteDistributionAndOrderingSuite]

enableSuite[GlutenAnsiCastSuiteWithAnsiModeOff]
.exclude(
"Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly.
)
.exclude("Fast fail for cast string type to decimal type in ansi mode")
.exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone")
.exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone")

enableSuite[GlutenAnsiCastSuiteWithAnsiModeOn]
.exclude(
"Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly.
)
.exclude("Fast fail for cast string type to decimal type in ansi mode")
.exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone")
.exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone")

enableSuite[GlutenCastSuiteWithAnsiModeOn]
.exclude(
"Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly.
)
.exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone")
.exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone")
.exclude("Fast fail for cast string type to decimal type in ansi mode")
enableSuite[GlutenTryCastSuite]
.exclude(
// array/map/struct not supported yet.
"cast from invalid string array to numeric array should throw NumberFormatException",
"cast from array II",
"cast from map II",
"cast from struct II"
)
.exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone")
.exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone")
.exclude("Fast fail for cast string type to decimal type in ansi mode")
enableSuite[GlutenArithmeticExpressionSuite]
.exclude(
"% (Remainder)" // Velox will throw exception when right is zero, need fallback
)
enableSuite[GlutenBitwiseExpressionsSuite]
enableSuite[GlutenCastSuite]
.exclude(
"Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly.
)
.exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone")
.exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone")
.exclude("cast from float")
.exclude("cast from double")
.exclude("from decimal")
.exclude("cast string to date #2")
.exclude("casting to fixed-precision decimals")
.exclude("cast from date")
.exclude("SPARK-32828: cast from a derived user-defined type to a base type")
.exclude("Fast fail for cast string type to decimal type")
.exclude("missing cases - from boolean")
enableSuite[GlutenCollectionExpressionsSuite]
.exclude("Map Concat")
.exclude("Shuffle")
enableSuite[GlutenComplexTypeSuite]
.exclude("CreateMap")
.exclude("MapFromArrays")
enableSuite[GlutenConditionalExpressionSuite]
enableSuite[GlutenDateExpressionsSuite]
// Has exception in fallback execution when we use resultDF.collect in evaluation.
.exclude("DATE_FROM_UNIX_DATE", "TIMESTAMP_MICROS")
.exclude("DayOfYear")
.exclude("Year")
.exclude("Quarter")
.exclude("Month")
.exclude("Day / DayOfMonth")
.exclude("DayOfWeek")
.exclude("extract the seconds part with fraction from timestamps")
enableSuite[GlutenDecimalExpressionSuite]
.exclude("MakeDecimal")
enableSuite[GlutenHashExpressionsSuite]
.exclude("SPARK-30633: xxHash with different type seeds")
enableSuite[GlutenIntervalExpressionsSuite]
.exclude("seconds")
.exclude("ANSI: extract days, hours, minutes and seconds")
enableSuite[GlutenJsonFunctionsSuite]
// Velox does not support single quotes in get_json_object function.
.exclude("function get_json_object - support single quotes")
enableSuite[GlutenLiteralExpressionSuite]
.exclude("default")
.exclude("decimal")
// FIXME(yma11): ObjectType is not covered in RowEncoder/Serializer in vanilla spark
.exclude("SPARK-37967: Literal.create support ObjectType")
enableSuite[GlutenMathExpressionsSuite]
.include("asinh", "acosh", "atanh", "sec", "csc")
.exclude("cos")
.exclude("cosh")
.exclude("toDegrees")
.exclude("toRadians")
.exclude("cbrt")
.exclude("exp")
.exclude("log10")
.exclude("log2")
.exclude("pow")
.exclude("atan2")
enableSuite[GlutenMiscExpressionsSuite]
enableSuite[GlutenNondeterministicSuite]
.exclude("MonotonicallyIncreasingID")
.exclude("SparkPartitionID")
enableSuite[GlutenNullExpressionsSuite]
enableSuite[GlutenPredicateSuite]
.exclude("BinaryComparison: lessThan")
.exclude("BinaryComparison: LessThanOrEqual")
.exclude("BinaryComparison: GreaterThan")
.exclude("BinaryComparison: GreaterThanOrEqual")
.exclude("SPARK-32764: compare special double/float values")
enableSuite[GlutenRandomSuite]
.exclude("random")
.exclude("SPARK-9127 codegen with long seed")
enableSuite[GlutenRegexpExpressionsSuite]
enableSuite[GlutenSortOrderExpressionsSuite]
enableSuite[GlutenStringExpressionsSuite]
.exclude("Substring")
.exclude("string for ascii")
.exclude("replace")
}
@@ -0,0 +1,20 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql

class GlutenJsonFunctionsSuite extends JsonFunctionsSuite with GlutenSQLTestsTrait {
}
@@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.{GlutenTestConstants, GlutenTestsTrait}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DataType, StringType}

import java.time.LocalDateTime

class GlutenCastSuiteWithAnsiModeOn extends AnsiCastSuiteBase with GlutenTestsTrait {

override def beforeAll(): Unit = {
super.beforeAll()
SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true)
}

override def afterAll(): Unit = {
super.afterAll()
SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
}

override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
v match {
case lit: Expression => Cast(lit, targetType, timeZoneId)
case _ => Cast(Literal(v), targetType, timeZoneId)
}
}

override def setConfigurationHint: String =
s"set ${SQLConf.ANSI_ENABLED.key} as false"
}

class GlutenAnsiCastSuiteWithAnsiModeOn extends AnsiCastSuiteBase with GlutenTestsTrait {

override def beforeAll(): Unit = {
super.beforeAll()
SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true)
}

override def afterAll(): Unit = {
super.afterAll()
SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
}

override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
v match {
case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
case _ => AnsiCast(Literal(v), targetType, timeZoneId)
}
}

override def setConfigurationHint: String =
s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" +
s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}"
}

class GlutenAnsiCastSuiteWithAnsiModeOff extends AnsiCastSuiteBase with GlutenTestsTrait {

override def beforeAll(): Unit = {
super.beforeAll()
SQLConf.get.setConf(SQLConf.ANSI_ENABLED, false)
}

override def afterAll(): Unit = {
super.afterAll()
SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
}

override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
v match {
case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
case _ => AnsiCast(Literal(v), targetType, timeZoneId)
}
}

override def setConfigurationHint: String =
s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" +
s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}"
}

class GlutenTryCastSuite extends TryCastSuite with GlutenTestsTrait {

private val specialTs = Seq(
"0001-01-01T00:00:00", // the fist timestamp of Common Era
"1582-10-15T23:59:59", // the cutover date from Julian to Gregorian calendar
"1970-01-01T00:00:00", // the epoch timestamp
"9999-12-31T23:59:59" // the last supported timestamp according to SQL standard
)

test(GlutenTestConstants.GLUTEN_TEST +
"SPARK-35698: cast timestamp without time zone to string") {
specialTs.foreach { s =>
checkEvaluation(cast(LocalDateTime.parse(s), StringType), s.replace("T", " "))
}
}
}
@@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.GlutenTestsTrait

class GlutenArithmeticExpressionSuite extends ArithmeticExpressionSuite with GlutenTestsTrait {

}
@@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.{GlutenTestConstants, GlutenTestsTrait}

class GlutenBitwiseExpressionsSuite extends BitwiseExpressionsSuite with GlutenTestsTrait {
}

0 comments on commit 87fe315

Please sign in to comment.