Skip to content

Commit

Permalink
[SPARK-47645][BUILD][CORE][SQL][YARN] Make Spark build with `-release…
Browse files Browse the repository at this point in the history
…` instead of `-target`

### What changes were proposed in this pull request?
This pr makes the following changes to allow Spark to build with `-release` instead of `-target`:

1. Use `MethodHandle` instead of direct calls to `sun.security.action.GetBooleanAction` and `sun.util.calendar.ZoneInfo`, because they are not `exports` APIs.

2. `Channels.newReader` is used instead of ``,StreamDecoder.forDecoder because `StreamDecoder.forDecoder` is also not `exports` APIs.

```java
  public static Reader newReader(ReadableByteChannel ch,
                                   CharsetDecoder dec,
                                   int minBufferCap)
    {
        Objects.requireNonNull(ch, "ch");
        return StreamDecoder.forDecoder(ch, dec.reset(), minBufferCap);
    }
```

3. Adjusted the import of `java.io._` in `yarn/Client.scala` to fix the compilation error:

```
Error: ] /home/runner/work/spark/spark/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala:20: object FileSystem is not a member of package java.io
```

4. Replaced `-target` with `-release` in `pom.xml` and `SparkBuild.scala`, and removed the `-source` option, because using `-release` is sufficient.

5. Upgrade `scala-maven-plugin` from 4.7.1 to 4.8.1 to fix the error `[ERROR] -release cannot be less than -target` when executing `build/mvn clean install -DskipTests -Djava.version=21`

### Why are the changes needed?
After Scala 2.13.9, the compile option `-target` has been deprecated, it is recommended to use `-release`:

- scala/scala#9982

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes apache#45716 from LuciferYang/scala-maven-plugin-491.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
LuciferYang authored and sweisdb committed Apr 1, 2024
1 parent 4f8b94f commit 3f56e1b
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 29 deletions.
Expand Up @@ -18,14 +18,16 @@
package org.apache.spark.serializer

import java.io._
import java.lang.invoke.MethodHandles
import java.lang.reflect.{Field, Method}
import java.security.AccessController
import java.security.{AccessController, PrivilegedAction}

import scala.annotation.tailrec
import scala.collection.mutable
import scala.util.control.NonFatal

import org.apache.spark.internal.Logging
import org.apache.spark.util.SparkClassUtils

private[spark] object SerializationDebugger extends Logging {

Expand Down Expand Up @@ -68,8 +70,13 @@ private[spark] object SerializationDebugger extends Logging {
}

private[serializer] var enableDebugging: Boolean = {
!AccessController.doPrivileged(new sun.security.action.GetBooleanAction(
"sun.io.serialization.extendedDebugInfo")).booleanValue()
val lookup = MethodHandles.lookup()
val clazz = SparkClassUtils.classForName("sun.security.action.GetBooleanAction")
val constructor = clazz.getConstructor(classOf[String])
val mh = lookup.unreflectConstructor(constructor)
val action = mh.invoke("sun.io.serialization.extendedDebugInfo")
.asInstanceOf[PrivilegedAction[Boolean]]
!AccessController.doPrivileged(action).booleanValue()
}

private class SerializationDebugger {
Expand Down
16 changes: 6 additions & 10 deletions pom.xml
Expand Up @@ -114,8 +114,7 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>17</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<maven.compiler.release>${java.version}</maven.compiler.release>
<maven.version>3.9.6</maven.version>
<exec-maven-plugin.version>3.1.0</exec-maven-plugin.version>
<sbt.project.name>spark</sbt.project.name>
Expand Down Expand Up @@ -175,8 +174,7 @@
<scala.version>2.13.13</scala.version>
<scala.binary.version>2.13</scala.binary.version>
<scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
<!-- don't upgrade scala-maven-plugin to version 4.7.2 or higher, see SPARK-45144 for details -->
<scala-maven-plugin.version>4.7.1</scala-maven-plugin.version>
<scala-maven-plugin.version>4.8.1</scala-maven-plugin.version>
<maven.scaladoc.skip>false</maven.scaladoc.skip>
<versions-maven-plugin.version>2.16.2</versions-maven-plugin.version>
<!-- for now, not running scalafmt as part of default verify pipeline -->
Expand Down Expand Up @@ -3060,7 +3058,8 @@
<arg>-deprecation</arg>
<arg>-feature</arg>
<arg>-explaintypes</arg>
<arg>-target:17</arg>
<arg>-release</arg>
<arg>17</arg>
<arg>-Wconf:cat=deprecation:wv,any:e</arg>
<arg>-Wunused:imports</arg>
<arg>-Wconf:cat=scaladoc:wv</arg>
Expand Down Expand Up @@ -3092,9 +3091,7 @@
<jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
</jvmArgs>
<javacArgs>
<javacArg>-source</javacArg>
<javacArg>${java.version}</javacArg>
<javacArg>-target</javacArg>
<javacArg>--release</javacArg>
<javacArg>${java.version}</javacArg>
<javacArg>-Xlint:all,-serial,-path,-try</javacArg>
</javacArgs>
Expand All @@ -3105,8 +3102,7 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>3.12.1</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
<release>${java.version}</release>
<skipMain>true</skipMain> <!-- skip compile -->
<skip>true</skip> <!-- skip testCompile -->
</configuration>
Expand Down
5 changes: 2 additions & 3 deletions project/SparkBuild.scala
Expand Up @@ -311,18 +311,17 @@ object SparkBuild extends PomBuild {

(Compile / javacOptions) ++= Seq(
"-encoding", UTF_8.name(),
"-source", javaVersion.value
"--release", javaVersion.value
),
// This -target and Xlint:unchecked options cannot be set in the Compile configuration scope since
// `javadoc` doesn't play nicely with them; see https://github.com/sbt/sbt/issues/355#issuecomment-3817629
// for additional discussion and explanation.
(Compile / compile / javacOptions) ++= Seq(
"-target", javaVersion.value,
"-Xlint:unchecked"
),

(Compile / scalacOptions) ++= Seq(
s"-target:${javaVersion.value}",
"-release", javaVersion.value,
"-sourcepath", (ThisBuild / baseDirectory).value.getAbsolutePath // Required for relative source links in scaladoc
),

Expand Down
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.deploy.yarn

import java.io.{FileSystem => _, _}
import java.io.{File, FileFilter, FileNotFoundException, FileOutputStream, InterruptedIOException, IOException, OutputStreamWriter}
import java.net.{InetAddress, UnknownHostException, URI, URL}
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
Expand Down
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.spark.sql.catalyst.util

import java.lang.invoke.{MethodHandles, MethodType}
import java.sql.{Date, Timestamp}
import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZonedDateTime, ZoneId, ZoneOffset}
import java.util.TimeZone
Expand All @@ -24,14 +25,13 @@ import java.util.regex.Pattern

import scala.util.control.NonFatal

import sun.util.calendar.ZoneInfo

import org.apache.spark.QueryContext
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseGregorianToJulianMicros, rebaseJulianToGregorianDays, rebaseJulianToGregorianMicros}
import org.apache.spark.sql.errors.ExecutionErrors
import org.apache.spark.sql.types.{DateType, TimestampType}
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.SparkClassUtils

trait SparkDateTimeUtils {

Expand Down Expand Up @@ -197,6 +197,15 @@ trait SparkDateTimeUtils {
rebaseJulianToGregorianDays(julianDays)
}

private val zoneInfoClassName = "sun.util.calendar.ZoneInfo"
private val getOffsetsByWallHandle = {
val lookup = MethodHandles.lookup()
val classType = SparkClassUtils.classForName(zoneInfoClassName)
val methodName = "getOffsetsByWall"
val methodType = MethodType.methodType(classOf[Int], classOf[Long], classOf[Array[Int]])
lookup.findVirtual(classType, methodName, methodType)
}

/**
* Converts days since the epoch 1970-01-01 in Proleptic Gregorian calendar to a local date
* at the default JVM time zone in the hybrid calendar (Julian + Gregorian). It rebases the given
Expand All @@ -215,8 +224,10 @@ trait SparkDateTimeUtils {
val rebasedDays = rebaseGregorianToJulianDays(days)
val localMillis = Math.multiplyExact(rebasedDays, MILLIS_PER_DAY)
val timeZoneOffset = TimeZone.getDefault match {
case zoneInfo: ZoneInfo => zoneInfo.getOffsetsByWall(localMillis, null)
case timeZone: TimeZone => timeZone.getOffset(localMillis - timeZone.getRawOffset)
case zoneInfo: TimeZone if zoneInfo.getClass.getName == zoneInfoClassName =>
getOffsetsByWallHandle.invoke(zoneInfo, localMillis, null).asInstanceOf[Int]
case timeZone: TimeZone =>
timeZone.getOffset(localMillis - timeZone.getRawOffset)
}
new Date(localMillis - timeZoneOffset)
}
Expand Down
Expand Up @@ -17,14 +17,13 @@

package org.apache.spark.sql.catalyst.json

import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}
import java.io.{ByteArrayInputStream, InputStream, InputStreamReader, Reader}
import java.nio.channels.Channels
import java.nio.charset.Charset
import java.nio.charset.StandardCharsets

import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text
import sun.nio.cs.StreamDecoder

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.unsafe.types.UTF8String
Expand Down Expand Up @@ -58,13 +57,13 @@ object CreateJacksonParser extends Serializable {
// a reader with specific encoding.
// The method creates a reader for an array with given encoding and sets size of internal
// decoding buffer according to size of input array.
private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = {
private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): Reader = {
val bais = new ByteArrayInputStream(in, 0, length)
val byteChannel = Channels.newChannel(bais)
val decodingBufferSize = Math.min(length, 8192)
val decoder = Charset.forName(enc).newDecoder()

StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize)
Channels.newReader(byteChannel, decoder, decodingBufferSize)
}

def text(enc: String, jsonFactory: JsonFactory, record: Text): JsonParser = {
Expand Down
Expand Up @@ -17,14 +17,13 @@

package org.apache.spark.sql.catalyst.xml

import java.io.{ByteArrayInputStream, InputStream, InputStreamReader, StringReader}
import java.io.{ByteArrayInputStream, InputStream, InputStreamReader, Reader, StringReader}
import java.nio.channels.Channels
import java.nio.charset.{Charset, StandardCharsets}
import javax.xml.stream.{EventFilter, XMLEventReader, XMLInputFactory, XMLStreamConstants}
import javax.xml.stream.events.XMLEvent

import org.apache.hadoop.io.Text
import sun.nio.cs.StreamDecoder

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.unsafe.types.UTF8String
Expand Down Expand Up @@ -75,13 +74,13 @@ object CreateXmlParser extends Serializable {
// a reader with specific encoding.
// The method creates a reader for an array with given encoding and sets size of internal
// decoding buffer according to size of input array.
private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = {
private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): Reader = {
val bais = new ByteArrayInputStream(in, 0, length)
val byteChannel = Channels.newChannel(bais)
val decodingBufferSize = Math.min(length, 8192)
val decoder = Charset.forName(enc).newDecoder()

StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize)
Channels.newReader(byteChannel, decoder, decodingBufferSize)
}

def text(enc: String, xmlInputFactory: XMLInputFactory, record: Text): XMLEventReader = {
Expand Down

0 comments on commit 3f56e1b

Please sign in to comment.