[B]Spark版本适配改造(二)1.0.7+
# 第二步:重构 do-component-build
构建脚本
该脚本位于:
📁 bigtop-packages/src/common/spark/do-component-build
它是 Spark RPM 构建链中的关键步骤,控制了源码构建、分发包生成、Maven 安装等过程。
# ✅ 修改前的原始代码如下:
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -ex
. `dirname $0`/bigtop.bom
BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${HOME} \
-Drepo.maven.org=$IVY_MIRROR_PROP \
-Dreactor.repo=file://${HOME}/.m2/repository \
-Dhadoop.version=$HADOOP_VERSION \
-Dyarn.version=$HADOOP_VERSION \
-Pyarn -Phadoop-3.2 \
-Phive -Phive-thriftserver \
-Psparkr -Pkubernetes \
-Pscala-${SCALA_VERSION%.*} \
-Dguava.version=27.0-jre \
$SPARK_BUILD_OPTS"
# BIGTOP-3762
export MAVEN_OPTS="${MAVEN_OPTS:--Xss64m -Xmx4g -XX:ReservedCodeCacheSize=1g}"
./dev/make-distribution.sh --mvn mvn --r $BUILD_OPTS -DskipTests
SPARK_SKIP_TESTS=$([ "$SPARK_RUN_TESTS" = "true" ] && echo false || echo true)
# make-distribution.sh will only run "mvn clean package", so in order to get the
# Spark packages installed in the local Maven repository (or to run the tests),
# we need to run "mvn install" again. However, it will be relatively fast because
# we are not running the "clean" phase.
#
# This is also the point that we can run the tests if desired, since tests must
# be run after Spark has already been packaged.
# See http://spark.apache.org/docs/latest/building-spark.html#spark-tests-in-maven
mvn $BUILD_OPTS install -DskipTests=$SPARK_SKIP_TESTS
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# ✅ 修改后的完整脚本如下:
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -ex
. $(dirname $0)/bigtop.bom
# 1. 基础 BUILD_OPTS,绝不直接写 "${SPARK_BUILD_OPTS:-}" 以免产生空元素
BUILD_OPTS=(
"-Dmaven.test.skip=true"
"-Dmaven.javadoc.skip=true"
"-Dmaven.scaladoc.skip=true"
"-DskipTests"
"-Dmaven.source.skip"
"-Dcyclonedx.skip=true"
"-Divy.home=${HOME}/.ivy2"
"-Dsbt.ivy.home=${HOME}/.ivy2"
"-Duser.home=${HOME}"
"-Drepo.maven.org=${IVY_MIRROR_PROP}"
"-Dreactor.repo=file://${HOME}/.m2/repository"
"-Dhadoop.version=${HADOOP_VERSION}"
"-Dyarn.version=${HADOOP_VERSION}"
"-Pyarn"
"-Phive"
"-Phive-thriftserver"
"-Psparkr"
"-Pkubernetes"
"-Pscala-${SCALA_VERSION%.*}"
"-Dguava.version=27.0-jre"
)
# 2. 仅当 SPARK_BUILD_OPTS 非空时,拆分并追加
if [[ -n "${SPARK_BUILD_OPTS-}" ]]; then
read -ra _sparktmp <<<"$SPARK_BUILD_OPTS"
BUILD_OPTS+=("${_sparktmp[@]}")
fi
# 3. 根据 SPARK_VERSION 追加 Hadoop profile
if [[ "${SPARK_VERSION}" == 3.2* ]]; then
BUILD_OPTS+=("-Phadoop-3.2")
elif [[ "${SPARK_VERSION}" == 3.5* ]]; then
BUILD_OPTS+=("-Phadoop-3")
else
echo "Warning: Unrecognized SPARK_VERSION='${SPARK_VERSION}', defaulting to -Phadoop-3.2" >&2
BUILD_OPTS+=("-Phadoop-3.2")
fi
# 4. **过滤空元素**,彻底去掉 '' 参数!
_cleaned=()
for opt in "${BUILD_OPTS[@]}"; do
[[ -n "$opt" ]] && _cleaned+=("$opt")
done
BUILD_OPTS=("${_cleaned[@]}")
unset _cleaned _sparktmp
# BIGTOP-3762
#export MAVEN_OPTS="${MAVEN_OPTS:--Xss64m -Xmx4g -XX:ReservedCodeCacheSize=1g}"
./dev/make-distribution.sh --mvn mvn --r "${BUILD_OPTS[@]}" -DskipTests
# 安装到本地仓库
mvn install \
-Dmaven.compile.skip=true \
-Dmaven.test.skip=true \
-Dmaven.javadoc.skip=true \
-Dmaven.scaladoc.skip=true \
-Dmaven.source.skip \
-Dcyclonedx.skip=true \
-DskipTests
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# 📌 修改了哪些内容?
改动点汇总
- 将
BUILD_OPTS
从字符串拼接改为数组声明; - 动态判断
SPARK_VERSION
来适配 Hadoop profile; - 支持外部
SPARK_BUILD_OPTS
参数传入; - 增加空参数清洗逻辑;
- 拆分出独立的
mvn install
流程,并全量添加skip
相关参数。
# 🧩 修改的原因
原因说明
- 原始脚本在 Spark 3.5 下构建失败,因
-Phadoop-3.2
不兼容; $SPARK_BUILD_OPTS
若为空,拼接会产生' '
参数,导致构建失败;- 构建阶段和 install 阶段参数不统一,影响产物准确性;
- 无法复用构建脚本做版本切换。
# 🔍 修改前后区别
维度 | 修改前 | 修改后 |
---|---|---|
参数结构 | 字符串拼接 | 数组处理,逻辑清晰 |
Hadoop Profile | 写死为 -Phadoop-3.2 | 根据版本自动切换 |
外部变量支持 | 直接拼接,易错 | 支持解析合并数组 |
参数污染处理 | 无 | 加入空值过滤机制 |
执行清晰度 | make-distribution + install 混杂 | 明确拆分构建与 install 阶段 |