2017-02-13 14:01:54 -05:00
#!/usr/bin/env bash
2017-01-03 17:16:32 -05:00
set -eo pipefail
2015-02-28 00:53:36 -05:00
# hello-world latest ef872312fe1b 3 months ago 910 B
# hello-world latest ef872312fe1bbc5e05aae626791a47ee9b032efa8f3bda39cc0be7b56bfe59b9 3 months ago 910 B
# debian latest f6fab3b798be 10 weeks ago 85.1 MB
# debian latest f6fab3b798be3174f45aa1eb731f8182705555f89c9026d8c1ef230cbf8301dd 10 weeks ago 85.1 MB
2018-10-31 21:43:29 -04:00
# check if essential commands are in our PATH
2020-09-18 18:40:45 -04:00
for cmd in curl jq; do
2018-10-31 21:43:29 -04:00
if ! command -v $cmd & > /dev/null; then
echo >& 2 " error: \" $cmd \" not found! "
exit 1
fi
done
2015-03-18 01:08:17 -04:00
2015-02-28 00:53:36 -05:00
usage( ) {
2015-08-31 13:06:22 -04:00
echo " usage: $0 dir image[:tag][@digest] ... "
echo " $0 /tmp/old-hello-world hello-world:latest@sha256:8be990ef2aeb16dbcb9271ddfe2610fa6658d13f6dfb8bc72074cc1ca36966a7 "
2015-02-28 00:53:36 -05:00
[ -z " $1 " ] || exit " $1 "
}
dir = " $1 " # dir for building tar in
shift || usage 1 >& 2
2020-06-12 13:39:29 -04:00
if ! [ $# -gt 0 ] && [ " $dir " ] ; then
usage 2 >& 2
fi
2015-02-28 00:53:36 -05:00
mkdir -p " $dir "
2015-03-12 12:09:23 -04:00
# hacky workarounds for Bash 3 support (no associative arrays)
images = ( )
rm -f " $dir " /tags-*.tmp
2017-01-03 17:16:32 -05:00
manifestJsonEntries = ( )
doNotGenerateManifestJson =
2015-03-12 12:09:23 -04:00
# repositories[busybox]='"latest": "...", "ubuntu-14.04": "..."'
2015-02-28 00:53:36 -05:00
2020-09-18 18:40:45 -04:00
# bash v4 on Windows CI requires CRLF separator... and linux doesn't seem to care either way
2017-01-03 17:16:32 -05:00
newlineIFS = $'\n'
2020-09-18 18:40:45 -04:00
major = $( echo " ${ BASH_VERSION %%[^0.9] } " | cut -d. -f1)
if [ " $major " -ge 4 ] ; then
newlineIFS = $'\r\n'
2017-01-03 17:16:32 -05:00
fi
2017-06-20 13:56:13 -04:00
registryBase = 'https://registry-1.docker.io'
authBase = 'https://auth.docker.io'
authService = 'registry.docker.io'
# https://github.com/moby/moby/issues/33700
2017-06-15 16:35:22 -04:00
fetch_blob( ) {
2020-03-02 22:27:49 -05:00
local token = " $1 "
shift
local image = " $1 "
shift
local digest = " $1 "
shift
local targetFile = " $1 "
shift
local curlArgs = ( " $@ " )
2017-06-20 13:56:13 -04:00
2020-06-12 13:39:29 -04:00
local curlHeaders
curlHeaders = " $(
2017-06-20 13:56:13 -04:00
curl -S " ${ curlArgs [@] } " \
-H " Authorization: Bearer $token " \
" $registryBase /v2/ $image /blobs/ $digest " \
-o " $targetFile " \
-D-
) "
curlHeaders = " $( echo " $curlHeaders " | tr -d '\r' ) "
2020-03-02 22:27:49 -05:00
if grep -qE "^HTTP/[0-9].[0-9] 3" <<< " $curlHeaders " ; then
2017-06-20 13:56:13 -04:00
rm -f " $targetFile "
2020-06-12 13:39:29 -04:00
local blobRedirect
blobRedirect = " $( echo " $curlHeaders " | awk -F ': ' 'tolower($1) == "location" { print $2; exit }' ) "
2017-06-20 13:56:13 -04:00
if [ -z " $blobRedirect " ] ; then
echo >& 2 " error: failed fetching ' $image ' blob ' $digest ' "
echo " $curlHeaders " | head -1 >& 2
return 1
fi
curl -fSL " ${ curlArgs [@] } " \
" $blobRedirect " \
-o " $targetFile "
fi
2017-06-15 16:35:22 -04:00
}
2017-12-12 00:41:05 -05:00
# handle 'application/vnd.docker.distribution.manifest.v2+json' manifest
handle_single_manifest_v2( ) {
2020-03-02 22:27:49 -05:00
local manifestJson = " $1 "
shift
2017-12-12 00:41:05 -05:00
2020-06-12 13:39:29 -04:00
local configDigest
configDigest = " $( echo " $manifestJson " | jq --raw-output '.config.digest' ) "
2017-12-12 00:41:05 -05:00
local imageId = " ${ configDigest #* : } " # strip off "sha256:"
local configFile = " $imageId .json "
fetch_blob " $token " " $image " " $configDigest " " $dir / $configFile " -s
2020-06-12 13:39:29 -04:00
local layersFs
layersFs = " $( echo " $manifestJson " | jq --raw-output --compact-output '.layers[]' ) "
2017-12-12 00:41:05 -05:00
local IFS = " $newlineIFS "
2020-06-12 13:39:29 -04:00
local layers
mapfile -t layers <<< " $layersFs "
2017-12-12 00:41:05 -05:00
unset IFS
echo " Downloading ' $imageIdentifier ' ( ${# layers [@] } layers)... "
local layerId =
local layerFiles = ( )
for i in " ${ !layers[@] } " ; do
local layerMeta = " ${ layers [ $i ] } "
2020-06-12 13:39:29 -04:00
local layerMediaType
layerMediaType = " $( echo " $layerMeta " | jq --raw-output '.mediaType' ) "
local layerDigest
layerDigest = " $( echo " $layerMeta " | jq --raw-output '.digest' ) "
2017-12-12 00:41:05 -05:00
# save the previous layer's ID
local parentId = " $layerId "
# create a new fake layer ID based on this layer's digest and the previous layer's fake ID
layerId = " $( echo " $parentId " $'\n' " $layerDigest " | sha256sum | cut -d' ' -f1) "
# this accounts for the possibility that an image contains the same layer twice (and thus has a duplicate digest value)
mkdir -p " $dir / $layerId "
echo '1.0' > " $dir / $layerId /VERSION "
if [ ! -s " $dir / $layerId /json " ] ; then
2020-06-12 13:39:29 -04:00
local parentJson
parentJson = " $( printf ', parent: "%s"' " $parentId " ) "
local addJson
addJson = " $( printf '{ id: "%s"%s }' " $layerId " " ${ parentId : + $parentJson } " ) "
2017-12-12 00:41:05 -05:00
# this starter JSON is taken directly from Docker's own "docker save" output for unimportant layers
2020-03-02 22:27:49 -05:00
jq " $addJson + . " > " $dir / $layerId /json " <<- 'EOJSON'
2017-12-12 00:41:05 -05:00
{
"created" : "0001-01-01T00:00:00Z" ,
"container_config" : {
"Hostname" : "" ,
"Domainname" : "" ,
"User" : "" ,
"AttachStdin" : false,
"AttachStdout" : false,
"AttachStderr" : false,
"Tty" : false,
"OpenStdin" : false,
"StdinOnce" : false,
"Env" : null,
"Cmd" : null,
"Image" : "" ,
"Volumes" : null,
"WorkingDir" : "" ,
"Entrypoint" : null,
"OnBuild" : null,
"Labels" : null
}
}
EOJSON
fi
case " $layerMediaType " in
application/vnd.docker.image.rootfs.diff.tar.gzip)
local layerTar = " $layerId /layer.tar "
2020-03-02 22:27:49 -05:00
layerFiles = ( " ${ layerFiles [@] } " " $layerTar " )
2017-12-12 00:41:05 -05:00
# TODO figure out why "-C -" doesn't work here
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
# "HTTP/1.1 416 Requested Range Not Satisfiable"
if [ -f " $dir / $layerTar " ] ; then
# TODO hackpatch for no -C support :'(
echo " skipping existing ${ layerId : 0 : 12 } "
continue
fi
2020-06-12 13:39:29 -04:00
local token
token = " $( curl -fsSL " $authBase /token?service= $authService &scope=repository: $image :pull " | jq --raw-output '.token' ) "
2020-05-06 07:20:51 -04:00
fetch_blob " $token " " $image " " $layerDigest " " $dir / $layerTar " --progress-bar
2017-12-12 00:41:05 -05:00
; ;
*)
echo >& 2 " error: unknown layer mediaType ( $imageIdentifier , $layerDigest ): ' $layerMediaType ' "
exit 1
; ;
esac
done
# change "$imageId" to be the ID of the last layer we added (needed for old-style "repositories" file which is created later -- specifically for older Docker daemons)
imageId = " $layerId "
# munge the top layer image manifest to have the appropriate image configuration for older daemons
2020-06-12 13:39:29 -04:00
local imageOldConfig
imageOldConfig = " $( jq --raw-output --compact-output '{ id: .id } + if .parent then { parent: .parent } else {} end' " $dir / $imageId /json " ) "
2017-12-12 00:41:05 -05:00
jq --raw-output " $imageOldConfig + del(.history, .rootfs) " " $dir / $configFile " > " $dir / $imageId /json "
2020-06-12 13:39:29 -04:00
local manifestJsonEntry
manifestJsonEntry = " $(
2017-12-12 00:41:05 -05:00
echo '{}' | jq --raw-output ' . + {
Config: "'" $configFile "'" ,
RepoTags: [ "'" ${ image #library \/ } :$tag "'" ] ,
Layers: '"$(echo ' [ ] ' | jq --raw-output ".$(for layerFile in "${layerFiles[@]}"; do echo " + [ \"$layerFile\" ]"; done)")"'
} '
) "
2020-03-02 22:27:49 -05:00
manifestJsonEntries = ( " ${ manifestJsonEntries [@] } " " $manifestJsonEntry " )
2017-12-12 00:41:05 -05:00
}
2020-09-18 18:40:45 -04:00
get_target_arch( ) {
if [ -n " ${ TARGETARCH :- } " ] ; then
echo " ${ TARGETARCH } "
return 0
fi
if type go > /dev/null; then
go env GOARCH
return 0
fi
if type dpkg > /dev/null; then
debArch = " $( dpkg --print-architecture) "
case " ${ debArch } " in
armel | armhf)
echo "arm"
return 0
; ;
*64el)
echo " ${ debArch %el } le "
return 0
; ;
*)
echo " ${ debArch } "
return 0
; ;
esac
fi
if type uname > /dev/null; then
uArch = " $( uname -m) "
case " ${ uArch } " in
x86_64)
echo amd64
return 0
; ;
arm | armv[ 0-9] *)
echo arm
return 0
; ;
aarch64)
echo arm64
return 0
; ;
mips*)
echo >& 2 "I see you are running on mips but I don't know how to determine endianness yet, so I cannot select a correct arch to fetch."
echo >& 2 "Consider installing \"go\" on the system which I can use to determine the correct arch or specify it explictly by setting TARGETARCH"
exit 1
; ;
*)
echo " ${ uArch } "
return 0
; ;
esac
fi
# default value
echo >& 2 "Unable to determine CPU arch, falling back to amd64. You can specify a target arch by setting TARGETARCH"
echo amd64
}
2015-02-28 00:53:36 -05:00
while [ $# -gt 0 ] ; do
imageTag = " $1 "
shift
image = " ${ imageTag %%[ : @]* } "
2015-08-31 13:06:22 -04:00
imageTag = " ${ imageTag #* : } "
digest = " ${ imageTag ##*@ } "
tag = " ${ imageTag %%@* } "
2015-03-25 13:38:17 -04:00
2015-08-31 13:06:22 -04:00
# add prefix library if passed official image
if [ [ " $image " != *"/" * ] ] ; then
image = " library/ $image "
fi
2015-05-04 15:58:21 -04:00
2015-08-31 13:06:22 -04:00
imageFile = " ${ image // \/ /_ } " # "/" can't be in filenames :)
2015-03-25 13:38:17 -04:00
2017-06-20 13:56:13 -04:00
token = " $( curl -fsSL " $authBase /token?service= $authService &scope=repository: $image :pull " | jq --raw-output '.token' ) "
2015-03-25 13:38:17 -04:00
2017-01-03 17:16:32 -05:00
manifestJson = " $(
curl -fsSL \
-H " Authorization: Bearer $token " \
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
2017-12-12 00:41:05 -05:00
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
2017-01-03 17:16:32 -05:00
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
2017-06-20 13:56:13 -04:00
" $registryBase /v2/ $image /manifests/ $digest "
2017-01-03 17:16:32 -05:00
) "
2015-08-31 13:06:22 -04:00
if [ " ${ manifestJson : 0 : 1 } " != '{' ] ; then
echo >& 2 " error: /v2/ $image /manifests/ $digest returned something unexpected: "
echo >& 2 " $manifestJson "
2015-02-28 00:53:36 -05:00
exit 1
fi
2015-03-25 13:38:17 -04:00
2017-01-03 17:16:32 -05:00
imageIdentifier = " $image : $tag @ $digest "
2015-08-31 13:06:22 -04:00
2017-01-03 17:16:32 -05:00
schemaVersion = " $( echo " $manifestJson " | jq --raw-output '.schemaVersion' ) "
case " $schemaVersion " in
2)
mediaType = " $( echo " $manifestJson " | jq --raw-output '.mediaType' ) "
case " $mediaType " in
application/vnd.docker.distribution.manifest.v2+json)
2017-12-12 00:41:05 -05:00
handle_single_manifest_v2 " $manifestJson "
; ;
application/vnd.docker.distribution.manifest.list.v2+json)
layersFs = " $( echo " $manifestJson " | jq --raw-output --compact-output '.manifests[]' ) "
2017-01-03 17:16:32 -05:00
IFS = " $newlineIFS "
2020-06-12 13:39:29 -04:00
mapfile -t layers <<< " $layersFs "
2017-01-03 17:16:32 -05:00
unset IFS
2017-12-12 00:41:05 -05:00
found = ""
2020-09-18 18:40:45 -04:00
targetArch = " $( get_target_arch) "
2017-12-12 00:41:05 -05:00
# parse first level multi-arch manifest
2017-01-03 17:16:32 -05:00
for i in " ${ !layers[@] } " ; do
layerMeta = " ${ layers [ $i ] } "
2017-12-12 00:41:05 -05:00
maniArch = " $( echo " $layerMeta " | jq --raw-output '.platform.architecture' ) "
2020-09-18 18:40:45 -04:00
if [ " $maniArch " = " ${ targetArch } " ] ; then
2017-12-12 00:41:05 -05:00
digest = " $( echo " $layerMeta " | jq --raw-output '.digest' ) "
# get second level single manifest
submanifestJson = " $(
curl -fsSL \
-H " Authorization: Bearer $token " \
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
" $registryBase /v2/ $image /manifests/ $digest "
) "
handle_single_manifest_v2 " $submanifestJson "
found = "found"
break
2017-01-03 17:16:32 -05:00
fi
done
2017-12-12 00:41:05 -05:00
if [ -z " $found " ] ; then
echo >& 2 " error: manifest for $maniArch is not found "
exit 1
fi
2017-01-03 17:16:32 -05:00
; ;
*)
echo >& 2 " error: unknown manifest mediaType ( $imageIdentifier ): ' $mediaType ' "
exit 1
; ;
esac
; ;
1)
if [ -z " $doNotGenerateManifestJson " ] ; then
echo >& 2 " warning: ' $imageIdentifier ' uses schemaVersion ' $schemaVersion ' "
echo >& 2 " this script cannot (currently) recreate the 'image config' to put in a 'manifest.json' (thus any schemaVersion 2+ images will be imported in the old way, and their 'docker history' will suffer)"
echo >& 2
doNotGenerateManifestJson = 1
fi
layersFs = " $( echo " $manifestJson " | jq --raw-output '.fsLayers | .[] | .blobSum' ) "
IFS = " $newlineIFS "
2020-06-12 13:39:29 -04:00
mapfile -t layers <<< " $layersFs "
2017-01-03 17:16:32 -05:00
unset IFS
history = " $( echo " $manifestJson " | jq '.history | [.[] | .v1Compatibility]' ) "
imageId = " $( echo " $history " | jq --raw-output '.[0]' | jq --raw-output '.id' ) "
echo " Downloading ' $imageIdentifier ' ( ${# layers [@] } layers)... "
for i in " ${ !layers[@] } " ; do
imageJson = " $( echo " $history " | jq --raw-output " .[ ${ i } ] " ) "
layerId = " $( echo " $imageJson " | jq --raw-output '.id' ) "
imageLayer = " ${ layers [ $i ] } "
mkdir -p " $dir / $layerId "
echo '1.0' > " $dir / $layerId /VERSION "
echo " $imageJson " > " $dir / $layerId /json "
# TODO figure out why "-C -" doesn't work here
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
# "HTTP/1.1 416 Requested Range Not Satisfiable"
if [ -f " $dir / $layerId /layer.tar " ] ; then
# TODO hackpatch for no -C support :'(
echo " skipping existing ${ layerId : 0 : 12 } "
continue
fi
2017-06-20 13:56:13 -04:00
token = " $( curl -fsSL " $authBase /token?service= $authService &scope=repository: $image :pull " | jq --raw-output '.token' ) "
2020-05-06 07:20:51 -04:00
fetch_blob " $token " " $image " " $imageLayer " " $dir / $layerId /layer.tar " --progress-bar
2017-01-03 17:16:32 -05:00
done
; ;
*)
echo >& 2 " error: unknown manifest schemaVersion ( $imageIdentifier ): ' $schemaVersion ' "
exit 1
; ;
esac
echo
2015-08-31 13:06:22 -04:00
2015-05-04 15:58:21 -04:00
if [ -s " $dir /tags- $imageFile .tmp " ] ; then
echo -n ', ' >> " $dir /tags- $imageFile .tmp "
2015-03-12 12:09:23 -04:00
else
2020-03-02 22:27:49 -05:00
images = ( " ${ images [@] } " " $image " )
2015-03-12 12:09:23 -04:00
fi
2015-05-04 15:58:21 -04:00
echo -n '"' " $tag " '": "' " $imageId " '"' >> " $dir /tags- $imageFile .tmp "
2015-02-28 00:53:36 -05:00
done
echo -n '{' > " $dir /repositories "
firstImage = 1
2015-03-12 12:09:23 -04:00
for image in " ${ images [@] } " ; do
2015-05-04 15:58:21 -04:00
imageFile = " ${ image // \/ /_ } " # "/" can't be in filenames :)
2015-08-31 13:06:22 -04:00
image = " ${ image #library \/ } "
2015-05-04 15:58:21 -04:00
2015-02-28 00:53:36 -05:00
[ " $firstImage " ] || echo -n ',' >> " $dir /repositories "
firstImage =
echo -n $'\n\t' >> " $dir /repositories "
2015-05-04 15:58:21 -04:00
echo -n '"' " $image " '": { ' " $( cat " $dir /tags- $imageFile .tmp " ) " ' }' >> " $dir /repositories "
2015-02-28 00:53:36 -05:00
done
echo -n $'\n}\n' >> " $dir /repositories "
2015-03-12 12:09:23 -04:00
rm -f " $dir " /tags-*.tmp
2017-01-03 17:16:32 -05:00
if [ -z " $doNotGenerateManifestJson " ] && [ " ${# manifestJsonEntries [@] } " -gt 0 ] ; then
echo '[]' | jq --raw-output " . $( for entry in " ${ manifestJsonEntries [@] } " ; do echo " + [ $entry ] " ; done ) " > " $dir /manifest.json "
else
rm -f " $dir /manifest.json "
fi
2015-02-28 00:53:36 -05:00
echo " Download of images into ' $dir ' complete. "
echo "Use something like the following to load the result into a Docker daemon:"
echo " tar -cC ' $dir ' . | docker load "