[B]Spark版本适配改造（二）

# 第二步：重构 `do-component-build` 构建脚本

该脚本位于：

📁 bigtop-packages/src/common/spark/do-component-build

它是 Spark RPM 构建链中的关键步骤，控制了源码构建、分发包生成、Maven 安装等过程。

# ✅ 修改前的原始代码如下：


#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

. `dirname $0`/bigtop.bom

BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${HOME} \
            -Drepo.maven.org=$IVY_MIRROR_PROP \
            -Dreactor.repo=file://${HOME}/.m2/repository \
            -Dhadoop.version=$HADOOP_VERSION \
            -Dyarn.version=$HADOOP_VERSION \
            -Pyarn -Phadoop-3.2 \
            -Phive -Phive-thriftserver \
            -Psparkr -Pkubernetes \
            -Pscala-${SCALA_VERSION%.*} \
            -Dguava.version=27.0-jre \
            $SPARK_BUILD_OPTS"

# BIGTOP-3762
export MAVEN_OPTS="${MAVEN_OPTS:--Xss64m -Xmx4g -XX:ReservedCodeCacheSize=1g}"

./dev/make-distribution.sh --mvn mvn --r $BUILD_OPTS -DskipTests

SPARK_SKIP_TESTS=$([ "$SPARK_RUN_TESTS" = "true" ] && echo false || echo true)

# make-distribution.sh will only run "mvn clean package", so in order to get the
# Spark packages installed in the local Maven repository (or to run the tests),
# we need to run "mvn install" again. However, it will be relatively fast because
# we are not running the "clean" phase.
#
# This is also the point that we can run the tests if desired, since tests must
# be run after Spark has already been packaged.
# See http://spark.apache.org/docs/latest/building-spark.html#spark-tests-in-maven
mvn $BUILD_OPTS install -DskipTests=$SPARK_SKIP_TESTS

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

# ✅ 修改后的完整脚本如下：


#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

. $(dirname $0)/bigtop.bom

# 1. 基础 BUILD_OPTS，绝不直接写 "${SPARK_BUILD_OPTS:-}" 以免产生空元素
BUILD_OPTS=(
  "-Dmaven.test.skip=true"
  "-Dmaven.javadoc.skip=true"
  "-Dmaven.scaladoc.skip=true"
  "-DskipTests"
  "-Dmaven.source.skip"
  "-Dcyclonedx.skip=true"
  "-Divy.home=${HOME}/.ivy2"
  "-Dsbt.ivy.home=${HOME}/.ivy2"
  "-Duser.home=${HOME}"
  "-Drepo.maven.org=${IVY_MIRROR_PROP}"
  "-Dreactor.repo=file://${HOME}/.m2/repository"
  "-Dhadoop.version=${HADOOP_VERSION}"
  "-Dyarn.version=${HADOOP_VERSION}"
  "-Pyarn"
  "-Phive"
  "-Phive-thriftserver"
  "-Psparkr"
  "-Pkubernetes"
  "-Pscala-${SCALA_VERSION%.*}"
  "-Dguava.version=27.0-jre"
)

# 2. 仅当 SPARK_BUILD_OPTS 非空时，拆分并追加
if [[ -n "${SPARK_BUILD_OPTS-}" ]]; then
  read -ra _sparktmp <<<"$SPARK_BUILD_OPTS"
  BUILD_OPTS+=("${_sparktmp[@]}")
fi

# 3. 根据 SPARK_VERSION 追加 Hadoop profile
if [[ "${SPARK_VERSION}" == 3.2* ]]; then
  BUILD_OPTS+=("-Phadoop-3.2")
elif [[ "${SPARK_VERSION}" == 3.5* ]]; then
  BUILD_OPTS+=("-Phadoop-3")
else
  echo "Warning: Unrecognized SPARK_VERSION='${SPARK_VERSION}', defaulting to -Phadoop-3.2" >&2
  BUILD_OPTS+=("-Phadoop-3.2")
fi

# 4. **过滤空元素**，彻底去掉 '' 参数！
_cleaned=()
for opt in "${BUILD_OPTS[@]}"; do
  [[ -n "$opt" ]] && _cleaned+=("$opt")
done
BUILD_OPTS=("${_cleaned[@]}")
unset _cleaned _sparktmp

# BIGTOP-3762
#export MAVEN_OPTS="${MAVEN_OPTS:--Xss64m -Xmx4g -XX:ReservedCodeCacheSize=1g}"

./dev/make-distribution.sh --mvn mvn --r "${BUILD_OPTS[@]}" -DskipTests

# 安装到本地仓库
mvn install \
  -Dmaven.compile.skip=true \
  -Dmaven.test.skip=true \
  -Dmaven.javadoc.skip=true \
  -Dmaven.scaladoc.skip=true \
  -Dmaven.source.skip \
  -Dcyclonedx.skip=true \
  -DskipTests

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

# 📌 修改了哪些内容？

改动点汇总

将 BUILD_OPTS 从字符串拼接改为数组声明；
动态判断 SPARK_VERSION 来适配 Hadoop profile；
支持外部 SPARK_BUILD_OPTS 参数传入；
增加空参数清洗逻辑；
拆分出独立的 mvn install 流程，并全量添加 skip 相关参数。

# 🧩 修改的原因

原因说明

原始脚本在 Spark 3.5 下构建失败，因 -Phadoop-3.2 不兼容；
$SPARK_BUILD_OPTS 若为空，拼接会产生 ' ' 参数，导致构建失败；
构建阶段和 install 阶段参数不统一，影响产物准确性；
无法复用构建脚本做版本切换。

# 🔍 修改前后区别

维度	修改前	修改后
参数结构	字符串拼接	数组处理，逻辑清晰
Hadoop Profile	写死为 `-Phadoop-3.2`	根据版本自动切换
外部变量支持	直接拼接，易错	支持解析合并数组
参数污染处理	无	加入空值过滤机制
执行清晰度	make-distribution + install 混杂	明确拆分构建与 install 阶段

#Bigtop #Spark #构建脚本 #版本适配

← [B]Spark版本适配改造（一） Livy_0.10.1编译→