当我们部署私有 Harbor 镜像仓库且采用 Ceph S3 作为存储后端时,您可能会经常遇到上传大容量镜像重试的问题。特别是当我们在管理 AI 模型文件发布的场景下,当我们采用 OCI 方式来封装模型文件(镜像单层超过 5GB)上传到 Harbor后,此现象就变得尤为突出。其主要现象如下:
docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
x.x.x.x:80/library/centos latest adf05892850f 3 days ago 7.61GB
docker push x.x.x.x:80/library/centos
Using default tag: latest
The push refers to repository [x.x.x.x:80/library/centos]
9e607bb861a7: Retrying in 4 seconds
{
"@timestamp":"2022-04-15T17:24:25+08:00",
"@fields":{
"remote_addr":"x.x.x.x",
"remote_user":"",
"body_bytes_sent":"223",
"request_time":"0.057",
"status":"404",
"request":"PUT /xxx-harbor/docker/registry/v2/blobs/sha256/72/729ec3a6ada3a6d26faca9b4779a037231f1762f759ef34c08bdd61bf52cd704/data?partNumber=1&uploadId=2~EE0O6o35Ceuqg9ZII4MAT8_gnaEkr3n HTTP/1.1",
"request_method":"PUT",
"request_header":"{\"x-amz-copy-source-range\":\"bytes=0-10485759\",\"x-amz-content-sha256\":\"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\",\"accept-encoding\":\"gzip\",\"host\":\"x.x.x.x\",\"user-agent\":\"aws-sdk-go\\/1.15.11 (go1.17.7; linux; amd64)\",\"x-amz-date\":\"20220415T092425Z\",\"x-amz-copy-source\":\"xxx-harbor\\/docker\\/registry\\/v2\\/repositories\\/library\\/centos\\/_uploads\\/9731d0e9-5d78-4fa3-a42b-2cf8f08847f4\\/data\",\"content-length\":\"0\",\"authorization\":\"AWS4-HMAC-SHA256 Credential=7IRCELLC8J9BTBNQV87C\\/20220415\\/default\\/s3\\/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-copy-source;x-amz-copy-source-range;x-amz-date, Signature=7232dce2e274a01a9838760c502d78f5ce1ad12b653c2f27986fe585c7594eb3\"}"
}
}
Ceph S3 账户信息
账户: cloudsre
租户: legacy
bucket: harbor
Harbor 对接 Ceph S3 配置
storage:
s3:
region: default
bucket: harbor
regionendpoint: http://x.x.x.x
multipartcopythresholdsize: 5368709120
cache:
layerinfo: redis
maintenance:
uploadpurging:
enabled: false
delete:
enabled: true
redirect:
disable: false
Harbor当前配置:
multipartcopythresholdsize
= 5G #(默认为32M、最大为5G).>threshold
,则会将大文件分片拷贝、rgw会报错404、push镜像retry.>threshold
,则将所有文件进行整体拷贝、push镜像成功.default
租户下的账户,则不论是否有大文件、push镜像都成功.\"x-amz-copy-source\":\"xxx-harbor\\/docker\\/registry\\/v2\\/repositories\\/library\\/centos\\/_uploads\\/9731d0e9-5d78-4fa3-a42b-2cf8f08847f4\\/data\"
x-amz-copy-source
,ceph rgw是可以自动识别源对象是属于 租户(legacy) 和 账户(cloudsre) 信息,即能正确找到需要拷贝的源对象UploadPartCopyInput
时,ceph rgw 不能根据 x-amz-copy-source
获取正确的租户、账户信息,后续就采用 “default” 租户下的 bucket:xxx-harbor
下的文件作为源对象进行拷贝,则报404rgw相关代码得知,分片拷贝时正确、且可以显示指定的bucket路径为:legacy:xxx-harbor
package main
import (
"fmt"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)
var (
// legacy/cloudsre
accessKey = "your-accessKey"
secretKey = "your-secretKey"
endPoint = "http://x.x.x.x:7480"
)
func main() {
sess, _ := session.NewSession(&aws.Config{
Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""),
Endpoint: aws.String(endPoint),
Region: aws.String("default"),
DisableSSL: aws.Bool(true),
S3ForcePathStyle: aws.Bool(true),
})
svc := s3.New(sess)
res, err := svc.CreateMultipartUpload(&s3.CreateMultipartUploadInput{
Bucket: aws.String("xxx-harbor"),
Key: aws.String("cp"),
})
uploadID := res.UploadId
fmt.Println(*uploadID)
copy_input := &s3.UploadPartCopyInput{
Bucket: aws.String("xxx-harbor"),
CopySource: aws.String("xxx-harbor/x.c"),
CopySource: aws.String("legacy:xxx-harbor/x.c"),
Key: aws.String("cp"),
PartNumber: aws.Int64(1),
UploadId: uploadID,
}
result, err := svc.UploadPartCopy(copy_input)
fmt.Println(result, err)
c_part := []*s3.CompletedPart{}
c_part = append(c_part, &s3.CompletedPart{
ETag: result.CopyPartResult.ETag,
PartNumber: aws.Int64(int64(1)),
})
part := &s3.CompletedMultipartUpload{
Parts: c_part,
}
res1, err := svc.CompleteMultipartUpload(&s3.CompleteMultipartUploadInput{
Bucket: aws.String("test"),
Key: aws.String("cp"),
UploadId: uploadID,
MultipartUpload: part,
})
fmt.Println(res1, err)
}
storage:
s3:
region: default
+ bucket: legacy:harbor-prod
- bucket: harbor-prod
regionendpoint: http://x.x.x.x
+ chunksize: 10485760
+ multipartcopychunksize: 10485760
multipartcopythresholdsize: 5368709120
cache:
layerinfo: redis
maintenance:
uploadpurging:
enabled: false
delete:
enabled: true
redirect:
disable: false