部署脚本
1. 基础配置脚本(all_nodes_prepare.sh,所有节点执行)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 #!/bin/bash set -eecho "===== 配置国内软件源 =====" if [[ $(uname -m) == "x86_64" ]]; then mv /etc/apt/sources.list /etc/apt/sources.list.bak cat > /etc/apt/sources.list << EOF deb https://mirrors.ustc.edu.cn/debian/ bookworm main contrib non-free deb https://mirrors.ustc.edu.cn/debian/ bookworm-updates main contrib non-free deb https://mirrors.ustc.edu.cn/debian-security bookworm-security main contrib non-free EOF elif [[ $(uname -m) == "aarch64" ]]; then mv /etc/apt/sources.list /etc/apt/sources.list.bak mv /etc/apt/sources.list.d/raspi.list /etc/apt/sources.list.d/raspi.list.bak 2>/dev/null || true cat > /etc/apt/sources.list << EOF deb https://mirrors.ustc.edu.cn/debian/ bookworm main contrib non-free deb https://mirrors.ustc.edu.cn/debian/ bookworm-updates main contrib non-free deb https://mirrors.ustc.edu.cn/debian-security bookworm-security main contrib non-free EOF cat > /etc/apt/sources.list.d/raspi.list << EOF deb http://mirrors.ustc.edu.cn/archive.raspberrypi.org/debian/ bookworm main EOF fi echo "===== 关闭UFW防火墙 =====" ufw disable || true if [[ $(uname -m) == "aarch64" ]]; then echo "===== 配置树莓派CGroup =====" if ! grep -q "cgroup_memory=1 cgroup_enable=memory" /boot/firmware/cmdline.txt; then sed -i '$ s/$/ cgroup_memory=1 cgroup_enable=memory/' /boot/firmware/cmdline.txt echo "CGroup配置已添加,重启后生效!" fi fi echo "===== 更新系统并安装基础依赖 =====" apt update && apt upgrade -y apt install -y curl wget vim git nfs-common open-iscsi chrony apt-transport-https ca-certificates software-properties-common echo "===== 配置时间同步 =====" timedatectl set-timezone Asia/Shanghai systemctl enable --now chrony echo "===== 关闭swap =====" swapoff -a sed -i '/swap/s/^/#/' /etc/fstab echo "===== 配置内核参数 =====" cat > /etc/sysctl.d/k8s.conf << EOF net.bridge.bridge-nf-call-ip6tables = 1 net.bridge.bridge-nf-call-iptables = 1 net.ipv4.ip_forward = 1 EOF sysctl --system echo "===== 加载内核模块 =====" modprobe br_netfilter modprobe overlay echo "===== 配置open-iscsi =====" systemctl enable --now iscsid echo "===== 配置主机名和Hosts =====" IP_ADDR=$(hostname -I | awk '{print $1}' ) if [[ $IP_ADDR == "192.168.2.40" ]]; then hostnamectl set-hostname k3s-master elif [[ $IP_ADDR == "192.168.2.50" ]]; then hostnamectl set-hostname k3s-worker-01 elif [[ $IP_ADDR =~ 192.168.2.5[1-4] ]]; then NUM=${IP_ADDR##*.} hostnamectl set-hostname k3s-worker-0$((NUM-49 )) fi cat >> /etc/hosts << EOF 192.168.2.40 k3s-master 192.168.2.50 k3s-worker-01 192.168.2.51 k3s-worker-02 192.168.2.52 k3s-worker-03 192.168.2.53 k3s-worker-04 192.168.2.54 k3s-worker-05 192.168.2.20 nfs-server EOF echo "===== 基础配置完成 =====" echo "注意:树莓派需重启以生效CGroup配置!"
2. K3s Master部署脚本(master_deploy.sh,192.168.2.40执行)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 #!/bin/bash set -eMASTER_IP="192.168.2.40" NFS_SERVER="192.168.2.20" NFS_PATH="/volume3/nfs-share" IFACE-"enp16s16" echo "===== 配置镜像加速 =====" mkdir -p /etc/rancher/k3scat > /etc/rancher/k3s/registries.yaml << 'EOF' mirrors: docker.io: endpoint: - "https://docker.m.daocloud.io" - "https://docker.1ms.ru" EOF echo "registries.yaml 创建完成" echo "===== 安装K3s Master(国内镜像) =====" curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_MIRROR=cn sh -s - server \ --node-ip $MASTER_IP \ --bind-address $MASTER_IP \ --advertise-address $MASTER_IP \ --disable traefik \ --disable servicelb \ --disable local-storage \ --flannel-iface $IFACE \ --cluster-cidr 10.42.0.0/16 \ --service-cidr 10.43.0.0/16 \ --kubelet-arg "cgroup-driver=systemd" echo "===== 等待K3s Master启动 =====" until kubectl get nodes; do sleep 5 done echo "===== 配置kubectl =====" mkdir -p ~/.kubecp /etc/rancher/k3s/k3s.yaml ~/.kube/configchmod 600 ~/.kube/configecho "===== 生成Worker加入信息 =====" TOKEN=$(cat /var/lib/rancher/k3s/server/node-token) echo "Worker节点加入命令:" echo "curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_MIRROR=cn K3S_URL=https://$MASTER_IP :6443 K3S_TOKEN=$TOKEN sh -" echo "===== 挂载NFS共享 =====" mkdir -p /mnt/nfs-sharemount -t nfs $NFS_SERVER :$NFS_PATH /mnt/nfs-share echo "$NFS_SERVER :$NFS_PATH /mnt/nfs-share nfs defaults 0 0" >> /etc/fstabecho "===== 创建Longhorn备份目录 =====" mkdir -p /mnt/nfs-share/longhorn-backupchmod 777 /mnt/nfs-share/longhorn-backupecho "===== K3s Master部署完成 =====" echo "K3s配置文件路径:~/.kube/config" echo "Token: $TOKEN " echo "NFS挂载路径:/mnt/nfs-share" echo "Longhorn备份路径:/mnt/nfs-share/longhorn-backup"
3. K3s Worker部署脚本(worker_deploy.sh,192.168.2.50~54执行)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 #!/bin/bash set -eMASTER_IP="192.168.2.40" TOKEN="替换实际token值" FLANNEL_IFACE="eth0" LONGHORN_MOUNT="/var/lib/longhorn" NODE_NAME=$(hostname | sed 's/[^a-zA-Z0-9\-]//g' ) if [[ -z "$NODE_NAME " ]]; then echo "❌ 无法获取主机名,节点名不能为空!" exit 1 fi echo "📌 自动识别节点名:$NODE_NAME " command_exists () { command -v "$1 " >/dev/null 2>&1 } service_running () { systemctl is-active --quiet "$1 " } is_mounted () { mount | grep -q "$1 " } echo -e "\n===== [Agent 本地] 配置镜像加速 =====" mkdir -p /etc/rancher/k3sREGISTRY_FILE="/etc/rancher/k3s/registries.yaml" if [ ! -f "$REGISTRY_FILE " ] || ! grep -q "docker.m.daocloud.io" "$REGISTRY_FILE " ; then cat > "$REGISTRY_FILE " << 'EOF' mirrors: docker.io: endpoint: - "https://docker.m.daocloud.io" - "https://docker.1ms.run" EOF echo "✅ 镜像加速配置已创建/更新" else echo "✅ 镜像加速配置已存在且有效,跳过" fi echo -e "\n===== [Agent 本地] 安装/校验 K3s Agent =====" WORKER_IP=$(ip addr show "$FLANNEL_IFACE " | grep -oP '(?<=inet\s)\d+(\.\d+){3}' | head -1) if [[ -z "$WORKER_IP " ]]; then echo "❌ 未找到 $FLANNEL_IFACE 网卡的IP地址,请检查网卡配置!" exit 1 fi if command_exists k3s-agent; then echo "✅ K3s Agent 已安装,跳过安装步骤" else echo "🔧 开始安装 K3s Agent(国内镜像源)..." curl -vL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | \ INSTALL_K3S_MIRROR=cn \ K3S_URL="https://$MASTER_IP :6443" \ K3S_TOKEN="$TOKEN " \ bash -x -s -- agent \ --node-ip "$WORKER_IP " \ --node-name "$NODE_NAME " \ --flannel-iface "$FLANNEL_IFACE " \ --kubelet-arg "cgroup-driver=systemd" 2>&1 | tee /tmp/k3s-agent-install.log echo "✅ K3s Agent 安装完成(节点名:$NODE_NAME )" fi if service_running k3s-agent; then echo "✅ K3s Agent 服务已正常运行" else echo "🔧 启动 K3s Agent 服务..." systemctl enable --now k3s-agent if service_running k3s-agent; then echo "✅ K3s Agent 服务启动成功" else echo "❌ K3s Agent 服务启动失败,请检查日志:journalctl -u k3s-agent" exit 1 fi fi echo -e "\n===== [Agent 本地] 配置/校验 Longhorn 存储 =====" SSD_DEVICE=$(lsblk -o NAME,TYPE,MOUNTPOINT | grep -E '^sd[a-z]\s+disk\s*$' | awk '{print "/dev/"$1}' | head -1) if [[ -z "$SSD_DEVICE " ]]; then echo "⚠️ 未找到USB SSD设备(sd开头),跳过Longhorn存储配置" else if is_mounted "$LONGHORN_MOUNT " ; then echo "✅ $LONGHORN_MOUNT 已挂载,跳过挂载步骤" else if [[ "$(blkid -s TYPE -o value "$SSD_DEVICE " 2>/dev/null) " != "ext4" ]]; then echo "🔧 格式化 SSD 设备 $SSD_DEVICE 为ext4(注意:会清空设备数据!)" mkfs.ext4 -F "$SSD_DEVICE " else echo "✅ $SSD_DEVICE 已为ext4格式,跳过格式化" fi mkdir -p "$LONGHORN_MOUNT " mount "$SSD_DEVICE " "$LONGHORN_MOUNT " echo "✅ SSD 已挂载到 $LONGHORN_MOUNT " if ! grep -q "^$SSD_DEVICE \s+$LONGHORN_MOUNT " /etc/fstab; then echo "$SSD_DEVICE $LONGHORN_MOUNT ext4 defaults 0 0" >> /etc/fstab echo "✅ 已添加到/etc/fstab,开机自动挂载" else echo "✅ /etc/fstab 中已存在挂载配置,跳过写入" fi fi if is_mounted "$LONGHORN_MOUNT " ; then echo "✅ Longhorn 存储配置完成,挂载路径:$LONGHORN_MOUNT " else echo "❌ Longhorn 存储挂载失败,请手动检查!" fi fi echo -e "\n===== [Agent 本地] 部署状态汇总 =====" echo "📌 节点名:$NODE_NAME " echo "📌 节点IP:$WORKER_IP " echo "📌 K3s Agent 状态:$(systemctl is-active k3s-agent) " echo "📌 Longhorn 存储挂载:$(is_mounted "$LONGHORN_MOUNT " && echo "已挂载" || echo "未挂载" ) " echo -e "\n✅ K3s Agent 幂等部署完成!" echo "ℹ️ 节点标签请在 Master 节点执行以下命令添加:" echo "kubectl label node $NODE_NAME node-type=raspberrypi --overwrite" echo "kubectl label node $NODE_NAME storage=longhorn --overwrite"
配置国内源(v1.34+):
所有节点修改配置
1 cp /var/lib/rancher/k3s/agent/etc/containerd/config.toml /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl
vim /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl
添加以下配置:
1 2 [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] endpoint = ["https://docker.m.daocloud.io" , "https://docker.1ms.ru" ]
systemctl restart k3s-agent
pod重建:
kubectl delete pods -n longhorn-system --all
树莓派UAS驱动兼容性问题
芯片型号
兼容性
备注
JMS578/JMS580
⭐⭐⭐⭐⭐
推荐,稳定性最好
ASM1153E
⭐⭐⭐⭐
良好,注意固件版本
VL716
⭐⭐⭐
一般,可能有休眠问题
NS1066/NS1068
⭐⭐
不推荐,兼容性问题多
常见芯片VID:PID参考:
芯片型号
VID:PID
JMS578
152d:0578
JMS580
152d:0580
ASM1153E
174c:55aa
VL716
2109:0716
NS1066
2537:1066
1 2 3 journalctl -k | grep -iE "usb.*device|idvendor|idproduct" | tail -30 dmesg | grep "usb 2-1" | head -20
1 2 3 4 5 6 7 vim /boot/firmware/cmdline.txt modprobe.blacklist=uas usb-storage.quirks=152d:0578:u vim /boot/firmware/config.txt max_usb_current=1
1 2 3 4 5 6 7 8 9 echo 'ACTION=="add", SUBSYSTEM=="usb", ATTR{power/control}="on"' | sudo tee /etc/udev/rules.d/50-usb-power-save.rulesupdate-initramfs -u udevadm control --reload-rules reboot lsusb -t | grep Driver dd if =/dev/sda of=/dev/null bs=1M count=10000 iflag=direct status=progress
批量打标(k3s master执行)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 #!/bin/bash set -eecho "📌 开始为所有树莓派Agent节点打标签..." AGENT_NODE_NAMES=$(kubectl get nodes -o name | grep -v "master" | sed 's/node\///g' ) if [[ -z "$AGENT_NODE_NAMES " ]]; then echo "⚠️ 未找到Agent节点,请先部署Agent并确保加入集群!" exit 1 fi for node in $AGENT_NODE_NAMES ; do kubectl label node "$node " node-type=raspberrypi --overwrite kubectl label node "$node " storage=longhorn --overwrite MEM_TOTAL=$(ssh root@$node "free -g | grep Mem | awk '{print \$2}'" ) if [[ "$MEM_TOTAL " -ge 7 ]]; then MEM_LABEL="8g" elif [[ "$MEM_TOTAL " -ge 3 ]]; then MEM_LABEL="4g" else echo "⚠️ 节点 $NODE_NAME 内存识别异常(获取到 $MEM_TOTAL GB),请手动检查!" continue fi kubectl label node "$node " "memory=${MEM_LABEL} " --overwrite echo "✅ 节点 $node 标签已更新" done echo -e "\n📌 节点标签验证:" kubectl get nodes -o custom-columns=NAME:.metadata.name,NODE_TYPE:.metadata.labels.node-type,MEMORY:.metadata.labels.memory,STORAGE:.metadata.labels.storage
4.Longhorn部署脚本(deploy_longhorn.sh,Master执行)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 #!/bin/bash set -euo pipefail NFS_SERVER=${1:-"192.168.2.20"} NFS_SHARE_PATH="/volume3/nfs-share/longhorn-backup" HELM_VERSION="v3.15.2" RETRY_MAX=3 RETRY_INTERVAL=5 TIMEOUT=300 retry () { local retries=$RETRY_MAX local count=0 while [ $count -lt $retries ]; do echo "[$(date +%Y-%m-%d\ %H:%M:%S) ] 执行命令:$@ (尝试 $((count+1) )/$retries )" if "$@ " ; then return 0 fi count=$((count+1 )) echo "命令执行失败,${RETRY_INTERVAL} 秒后重试..." sleep $RETRY_INTERVAL done echo "[$(date +%Y-%m-%d\ %H:%M:%S) ] 命令重试$retries 次失败:$@ " exit 1 } namespace_exists () { kubectl get namespace "$1 " &> /dev/null } helm_release_exists () { helm list -n "$2 " | grep -q "^$1 \s" } pods_ready () { local ns="$1 " local label="$2 " if kubectl get pods -n "$ns " -l "$label " -o jsonpath='{range .items[*]}{.status.conditions[?(@.type=="Ready")].status}{"\n"}{end}' | grep -q False; then return 1 fi if [ -z "$(kubectl get pods -n "$ns " -l "$label " 2>/dev/null) " ]; then return 1 fi return 0 } echo "===== 1. 前置校验 =====" if ! ping -c 1 -W 3 "${NFS_SERVER} " &> /dev/null; then echo "⚠️ 警告:NFS服务器 ${NFS_SERVER} 暂时无法访问,继续部署但备份功能可能失效!" else echo "✅ NFS服务器 ${NFS_SERVER} 可达" fi echo -e "\n===== 2. 安装Helm ${HELM_VERSION} =====" if command -v helm &> /dev/null; then CURRENT_HELM_VERSION=$(helm version --short | awk -F'v' '{print $2}' | awk -F'+' '{print $1}' ) echo "✅ Helm已安装,当前版本:${CURRENT_HELM_VERSION} (目标版本:${HELM_VERSION#v} )" else echo "📥 开始安装Helm..." retry wget "https://get.helm.sh/helm-${HELM_VERSION} -linux-amd64.tar.gz" -O helm.tar.gz || { echo "🔄 官方源失败,尝试腾讯云备用源..." retry wget "https://mirrors.cloud.tencent.com/helm/${HELM_VERSION} /helm-${HELM_VERSION} -linux-amd64.tar.gz" -O helm.tar.gz } tar -zxvf helm.tar.gz mv -f linux-amd64/helm /usr/local/bin/ rm -rf linux-amd64 helm.tar.gz if ! command -v helm &> /dev/null; then echo "❌ Helm安装失败!" exit 1 fi echo "✅ Helm ${HELM_VERSION} 安装成功" fi retry helm repo add longhorn https://charts.longhorn.io --force-update retry helm repo add stable https://mirror.azure.cn/kubernetes/charts/ --force-update retry helm repo update echo -e "\n===== 3. 创建Longhorn命名空间 =====" if namespace_exists "longhorn-system" ; then echo "✅ 命名空间 longhorn-system 已存在,跳过创建" else retry kubectl create namespace longhorn-system echo "✅ 命名空间 longhorn-system 创建成功" fi echo -e "\n===== 4. 部署Longhorn =====" if helm_release_exists "longhorn" "longhorn-system" ; then echo "⚠️ Longhorn已部署,执行升级(而非重新安装)以保证幂等..." retry helm upgrade longhorn longhorn/longhorn \ --namespace longhorn-system \ --set defaultSettings.defaultDataPath=/var/lib/longhorn \ --set defaultSettings.replicaCount=2 \ --set defaultSettings.minimalAvailableReplicaCount=1 \ --set csi.kubeletRootDir=/var/lib/kubelet \ --set persistence.defaultClass=true \ --set persistence.defaultClassReplicaCount=2 \ --set defaultSettings.backupTarget=nfs://${NFS_SERVER} ${NFS_SHARE_PATH} \ --set defaultSettings.backupTargetCredentialSecret="" else echo "📦 首次部署Longhorn..." retry helm install longhorn longhorn/longhorn \ --namespace longhorn-system \ --set defaultSettings.defaultDataPath=/var/lib/longhorn \ --set defaultSettings.replicaCount=2 \ --set defaultSettings.minimalAvailableReplicaCount=1 \ --set csi.kubeletRootDir=/var/lib/kubelet \ --set persistence.defaultClass=true \ --set persistence.defaultClassReplicaCount=2 \ --set defaultSettings.backupTarget=nfs://${NFS_SERVER} ${NFS_SHARE_PATH} \ --set defaultSettings.backupTargetCredentialSecret="" fi echo -e "\n===== 5. 等待Longhorn组件启动(超时${TIMEOUT} 秒) =====" start_time=$(date +%s) while true ; do current_time=$(date +%s) elapsed=$((current_time - start_time)) if [ $elapsed -ge $TIMEOUT ]; then echo "❌ 等待Pod就绪超时(${TIMEOUT} 秒)!请手动检查:kubectl get pods -n longhorn-system" break fi if pods_ready "longhorn-system" "app=longhorn-manager" && pods_ready "longhorn-system" "app=longhorn-ui" ; then echo "✅ 所有Longhorn Pod已就绪" break fi echo "⌛ 等待Pod就绪中(已耗时${elapsed} 秒)... 当前状态:" kubectl get pods -n longhorn-system -l "app in (longhorn-manager, longhorn-ui)" --no-headers | awk '{print " " $1 " -> " $3}' sleep 10 done echo -e "\n===== 6. 暴露Longhorn UI =====" SVC_TYPE=$(kubectl get svc longhorn-frontend -n longhorn-system -o jsonpath='{.spec.type}' 2>/dev/null || echo "" ) if [ "$SVC_TYPE " = "NodePort" ]; then echo "✅ Longhorn UI已为NodePort类型,跳过配置" else retry kubectl patch svc longhorn-frontend -n longhorn-system -p '{"spec":{"type":"NodePort","ports":[{"port":80,"targetPort":8000,"nodePort":30000}]}}' echo "✅ Longhorn UI已暴露为NodePort: 30000" fi echo -e "\n===== 部署完成 ===== 🎉" echo "📌 关键信息:" echo " - Longhorn命名空间:longhorn-system" echo " - UI访问地址:http://任意节点IP:30000" echo " - 备份目标:nfs://${NFS_SERVER} ${NFS_SHARE_PATH} " echo " - 检查Pod状态:kubectl get pods -n longhorn-system" echo " - 重新部署:直接再次运行本脚本即可(已做幂等处理)"
4.5 Longhorn 清理磁盘元数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 NODE_NAME=$(hostname) kubectl -n longhorn-system get nodes.longhorn.io $NODE_NAME -o yaml > /tmp/node.yaml kubectl -n longhorn-system delete nodes.longhorn.io $NODE_NAME --force --grace-period=0 MANAGER_POD=$(kubectl -n longhorn-system get pods -o wide | grep longhorn-manager | grep $NODE_NAME | awk '{print $1}' ) kubectl -n longhorn-system delete pod $MANAGER_POD echo "等待 Longhorn 重新发现节点..." sleep 90kubectl -n longhorn-system patch nodes.longhorn.io $NODE_NAME --type =merge -p '{"spec":{"allowScheduling":true}}' kubectl -n longhorn-system get nodes.longhorn.io kubectl -n longhorn-system get nodes.longhorn.io $NODE_NAME -o yaml > /tmp/node.yaml cat /tmp/node.yaml | sed 's/type: Ready/type: NotReady/' > /tmp/node-fake.yamlkubectl -n longhorn-system delete nodes.longhorn.io $NODE_NAME --force 2>/dev/null || true kubectl apply -f /tmp/node-fake.yaml 2>/dev/null || true sleep 2kubectl -n longhorn-system delete nodes.longhorn.io $NODE_NAME
5. NFS存储集成脚本(deploy_nfs.sh,Master执行)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 #!/bin/bash set -euo pipefailNFS_SERVER="192.168.2.20" NFS_PATH="/volume3/nfs-share" STORAGE_CLASS_NAME="nfs-storage" PV_NAME="nfs-pv-01" PVC_NAME="nfs-pvc" NAMESPACE="default" STORAGE_SIZE="450Gi" function pre_check () { echo "===== 执行前置检查 =====" if ! ping -c 2 -W 3 "${NFS_SERVER} " >/dev/null 2>&1; then echo "错误:NFS 服务器 ${NFS_SERVER} 无法 ping 通,请检查网络!" exit 1 fi if ! showmount -e "${NFS_SERVER} " | grep -q "${NFS_PATH} " ; then echo "错误:NFS 服务器 ${NFS_SERVER} 未共享 ${NFS_PATH} 目录,请检查!" exit 1 fi echo "===== 前置检查通过 =====" } function create_storageclass () { echo -e "\n===== 开始创建/更新 NFS 存储类 =====" cat > nfs-storageclass.yaml << EOF apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: ${STORAGE_CLASS_NAME} provisioner: kubernetes.io/no-provisioner volumeBindingMode: Immediate EOF kubectl apply -f nfs-storageclass.yaml echo "===== NFS 存储类 ${STORAGE_CLASS_NAME} 处理完成 =====" } function create_pv () { echo -e "\n===== 开始创建/更新 NFS PV =====" cat > nfs-pv.yaml << EOF apiVersion: v1 kind: PersistentVolume metadata: name: ${PV_NAME} spec: capacity: storage: ${STORAGE_SIZE} accessModes: - ReadWriteMany persistentVolumeReclaimPolicy: Retain storageClassName: ${STORAGE_CLASS_NAME} nfs: path: ${NFS_PATH} server: ${NFS_SERVER} EOF kubectl apply -f nfs-pv.yaml echo "===== NFS PV ${PV_NAME} 处理完成 =====" } function create_pvc () { echo -e "\n===== 开始创建/更新 NFS PVC =====" cat > nfs-pvc.yaml << EOF apiVersion: v1 kind: PersistentVolumeClaim metadata: name: ${PVC_NAME} namespace: ${NAMESPACE} spec: accessModes: - ReadWriteMany resources: requests: storage: ${STORAGE_SIZE} storageClassName: ${STORAGE_CLASS_NAME} EOF kubectl apply -f nfs-pvc.yaml echo "===== NFS PVC ${PVC_NAME} (${NAMESPACE} ) 处理完成 =====" } function verify_pvc () { echo -e "\n===== 验证 NFS PVC 状态 =====" retries=10 interval=5 while [[ ${retries} -gt 0 ]]; do pvc_status=$(kubectl get pvc -n "${NAMESPACE} " "${PVC_NAME} " -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound" ) if [[ ${pvc_status} == "Bound" ]]; then echo "✅ PVC ${PVC_NAME} 状态正常(Bound)" kubectl get pvc -n "${NAMESPACE} " "${PVC_NAME} " return 0 elif [[ ${pvc_status} == "NotFound" ]]; then echo "❌ PVC ${PVC_NAME} 不存在,重试中..." else echo "⚠️ PVC ${PVC_NAME} 当前状态:${pvc_status} ,等待绑定(剩余重试次数:${retries} )" fi retries=$((retries - 1 )) sleep ${interval} done echo "❌ 超时!PVC ${PVC_NAME} 未绑定成功,请检查 PV/PVC 配置或 NFS 服务" kubectl describe pvc -n "${NAMESPACE} " "${PVC_NAME} " exit 1 } function print_example () { echo -e "\n===== NFS 存储集成完成 =====" echo "NFS PVC 信息:${PVC_NAME} (${NAMESPACE} 命名空间)" echo "使用示例(测试 Pod YAML):" cat << EOF apiVersion: v1 kind: Pod metadata: name: nfs-test-pod spec: containers: - name: test-container image: busybox command: ["/bin/sh", "-c", "sleep 3600"] volumeMounts: - name: nfs-volume mountPath: /mnt/nfs volumes: - name: nfs-volume persistentVolumeClaim: claimName: ${PVC_NAME} namespace: ${NAMESPACE} EOF } function main () { pre_check create_storageclass create_pv create_pvc verify_pvc print_example rm -f nfs-storageclass.yaml nfs-pv.yaml nfs-pvc.yaml echo -e "\n===== 所有操作完成 =====" } main
部署执行步骤
1. 基础配置阶段
1 2 3 4 5 6 7 chmod +x all_nodes_prepare.sh./all_nodes_prepare.sh reboot cat /boot/firmware/cmdline.txt | grep cgroup
2. K3s集群部署阶段
1 2 3 4 5 6 7 8 9 10 11 12 13 14 chmod +x master_deploy.sh./master_deploy.sh chmod +x worker_deploy.sh./worker_deploy.sh kubectl label node k3s-worker-01 node-type=raspberrypi storage=longhorn kubectl label node k3s-worker-02 node-type=raspberrypi storage=longhorn kubectl label node k3s-worker-03 node-type=raspberrypi storage=longhorn kubectl label node k3s-worker-04 node-type=raspberrypi storage=longhorn kubectl label node k3s-worker-05 node-type=raspberrypi storage=longhorn kubectl get nodes
3. 存储与工具部署阶段
1 2 3 4 5 6 chmod +x deploy_longhorn.sh./deploy_longhorn.sh chmod +x deploy_nfs.sh./deploy_nfs.sh
NFS挂载的实际使用示例
1. 作为应用存储使用
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 apiVersion: apps/v1 kind: Deployment metadata: name: nfs-demo-app spec: replicas: 2 selector: matchLabels: app: nfs-demo template: metadata: labels: app: nfs-demo spec: containers: - name: nginx image: nginx:alpine ports: - containerPort: 80 volumeMounts: - name: nfs-data mountPath: /usr/share/nginx/html volumes: - name: nfs-data persistentVolumeClaim: claimName: nfs-pvc
执行部署:
1 2 3 kubectl apply -f nfs-app.yaml kubectl expose deployment nfs-demo-app --port=80 --type =NodePort
2. Longhorn备份使用NFS
访问Longhorn UI(http://任意节点IP:30000 )
进入「Backup」页面,可看到已配置的NFS备份目标
对任意Volume创建备份,数据会自动存储到NFS的/volume3/nfs-share/longhorn-backup目录
总结
核心优化点 :替换K3s国内镜像源、配置国内系统源、关闭UFW、开启树莓派CGroup,确保国内环境可正常部署。
NFS挂载意义 :Master挂载NFS主要用于Longhorn备份、集群配置存储和应用静态数据共享,是集群存储架构的重要补充。
关键操作 :树莓派执行基础配置后必须重启以生效CGroup,Worker节点需正确挂载SSD并标记节点标签,Longhorn需配置NFS作为备份目标。
所有脚本已适配国内网络环境和硬件架构,执行前需确保NFS服务端可访问、树莓派SSD已正确连接,部署完成后可通过Longhorn UI和hiclaw Dashboard管理集群存储和容器。