diff --git a/libnetwork/cmd/ssd/Dockerfile b/libnetwork/cmd/ssd/Dockerfile new file mode 100755 index 0000000000..0542247920 --- /dev/null +++ b/libnetwork/cmd/ssd/Dockerfile @@ -0,0 +1,34 @@ +FROM alpine:3.7 +ENV PACKAGES="\ + musl \ + linux-headers \ + build-base \ + util-linux \ + bash \ + git \ + ca-certificates \ + python2 \ + python2-dev \ + py-setuptools \ + iproute2 \ + curl \ + strace \ + drill \ + ipvsadm \ + iperf \ + ethtool \ +" + +RUN echo \ + && apk add --no-cache $PACKAGES \ + && if [[ ! -e /usr/bin/python ]]; then ln -sf /usr/bin/python2.7 /usr/bin/python; fi \ + && if [[ ! -e /usr/bin/python-config ]]; then ln -sf /usr/bin/python2.7-config /usr/bin/python-config; fi \ + && if [[ ! -e /usr/bin/easy_install ]]; then ln -sf /usr/bin/easy_install-2.7 /usr/bin/easy_install; fi \ + && easy_install pip \ + && pip install --upgrade pip \ + && if [[ ! -e /usr/bin/pip ]]; then ln -sf /usr/bin/pip2.7 /usr/bin/pip; fi \ + && echo + +ADD ssd.py / +RUN pip install git+git://github.com/docker/docker-py.git +ENTRYPOINT [ "python", "/ssd.py"] diff --git a/libnetwork/cmd/ssd/README.md b/libnetwork/cmd/ssd/README.md new file mode 100755 index 0000000000..a0a0048da5 --- /dev/null +++ b/libnetwork/cmd/ssd/README.md @@ -0,0 +1,47 @@ +# Docker Swarm Service Driller(ssd) + +ssd is a troubleshooting utility for Docker swarm networks. + +### control-plane and datapath consistency check on a node +ssd checks for the consistency between docker network control-plane (from the docker daemon in-memory state) and kernel data path programming. Currently the tool checks only for the consistency of the Load balancer (implemented using IPVS). + +In a three node swarm cluser ssd status for a overlay network `ov2` which has three services running, each replicated to 3 instances. + +````bash +vagrant@net-1:~/code/go/src/github.com/docker/docker-e2e/tests$ docker run -v /var/run/docker.sock:/var/run/docker.sock -v /var/run/docker/netns:/var/run/docker/netns --privileged --net=host sanimej/ssd ov2 +Verifying LB programming for containers on network ov2 +Verifying container /s2.3.ltrdwef0iqf90rqauw3ehcs56... +service s2... OK +service s3... OK +service s1... OK +Verifying container /s3.3.nyhwvdvnocb4wftyhb8dr4fj8... +service s2... OK +service s3... OK +service s1... OK +Verifying container /s1.3.wwx5tuxhnvoz5vrb8ohphby0r... +service s2... OK +service s3... OK +service s1... OK +Verifying LB programming for containers on network ingress +Verifying container Ingress... +service web... OK +```` + +ssd checks the required iptables programming to direct an incoming packet with the : to the right : + +### control-plane consistency check across nodes in a cluster + +Docker networking uses a gossip protocol to synchronize networking state across nodes in a cluster. ssd's `gossip-consistency` command verifies if the state maintained by all the nodes are consistent. + +````bash +In a three node cluster with services running on an overlay network ov2 ssd consistency-checker shows + +vagrant@net-1:~/code/go/src/github.com/docker/docker-e2e/tests$ docker run -v /var/run/docker.sock:/var/run/docker.sock -v /var/run/docker/netns:/var/run/docker/netns --privileged sanimej/ssd ov2 gossip-consistency +Node id: sjfp0ca8f43rvnab6v7f21gq0 gossip hash c57d89094dbb574a37930393278dc282 + +Node id: bg228r3q9095grj4wxkqs80oe gossip hash c57d89094dbb574a37930393278dc282 + +Node id: 6jylcraipcv2pxdricqe77j5q gossip hash c57d89094dbb574a37930393278dc282 +```` + +This is hash digest of the control-plane state for the network `ov2` from all the cluster nodes. If the values have a mismatch `docker network inspect --verbose` on the individual nodes can help in identifying what the specific difference is. diff --git a/libnetwork/cmd/ssd/ssd.py b/libnetwork/cmd/ssd/ssd.py new file mode 100755 index 0000000000..2cfc039bd3 --- /dev/null +++ b/libnetwork/cmd/ssd/ssd.py @@ -0,0 +1,180 @@ +#!/usr/bin/python + +import sys, signal, time +import docker +import re +import subprocess +import json +import hashlib + +ipv4match = re.compile( + r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9]).' + + r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9]).' + + r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9]).' + + r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9])' +) + +def check_iptables(name, plist): + replace = (':', ',') + ports = [] + for port in plist: + for r in replace: + port = port.replace(r, ' ') + + p = port.split() + ports.append((p[1], p[3])) + + # get the ingress sandbox's docker_gwbridge network IP. + # published ports get DNAT'ed to this IP. + ip = subprocess.check_output(['/usr/bin/nsenter', '--net=/var/run/docker/netns/ingress_sbox', '/bin/bash', '-c', 'ifconfig eth1 | grep \"inet\\ addr\" | cut -d: -f2 | cut -d\" \" -f1']) + ip = ip.rstrip() + + for p in ports: + rule = '/sbin/iptables -t nat -C DOCKER-INGRESS -p tcp --dport {0} -j DNAT --to {1}:{2}'.format(p[1], ip, p[1]) + try: + subprocess.check_output(["/bin/bash", "-c", rule]) + except subprocess.CalledProcessError as e: + print "Service {0}: host iptables DNAT rule for port {1} -> ingress sandbox {2}:{3} missing".format(name, p[1], ip, p[1]) + +def get_namespaces(data, ingress=False): + if ingress is True: + return {"Ingress":"/var/run/docker/netns/ingress_sbox"} + else: + spaces =[] + for c in data["Containers"]: + sandboxes = {str(c) for c in data["Containers"]} + + containers = {} + for s in sandboxes: + spaces.append(str(cli.inspect_container(s)["NetworkSettings"]["SandboxKey"])) + inspect = cli.inspect_container(s) + containers[str(inspect["Name"])] = str(inspect["NetworkSettings"]["SandboxKey"]) + return containers + + +def check_network(nw_name, ingress=False): + + print "Verifying LB programming for containers on network %s" % nw_name + + data = cli.inspect_network(nw_name, verbose=True) + + services = data["Services"] + fwmarks = {str(service): str(svalue["LocalLBIndex"]) for service, svalue in services.items()} + + stasks = {} + for service, svalue in services.items(): + if service == "": + continue + tasks = [] + for task in svalue["Tasks"]: + tasks.append(str(task["EndpointIP"])) + stasks[fwmarks[str(service)]] = tasks + + # for services in ingress network verify the iptables rules + # that direct ingress (published port) to backend (target port) + if ingress is True: + check_iptables(service, svalue["Ports"]) + + containers = get_namespaces(data, ingress) + for container, namespace in containers.items(): + print "Verifying container %s..." % container + ipvs = subprocess.check_output(['/usr/bin/nsenter', '--net=%s' % namespace, '/usr/sbin/ipvsadm', '-ln']) + + mark = "" + realmark = {} + for line in ipvs.splitlines(): + if "FWM" in line: + mark = re.findall("[0-9]+", line)[0] + realmark[str(mark)] = [] + elif "->" in line: + if mark == "": + continue + ip = ipv4match.search(line) + if ip is not None: + realmark[mark].append(format(ip.group(0))) + else: + mark = "" + for key in realmark.keys(): + if key not in stasks: + print "LB Index %s" % key, "present in IPVS but missing in docker daemon" + del realmark[key] + + for key in stasks.keys(): + if key not in realmark: + print "LB Index %s" % key, "present in docker daemon but missing in IPVS" + del stasks[key] + + for key in realmark: + service = "--Invalid--" + for sname, idx in fwmarks.items(): + if key == idx: + service = sname + if len(set(realmark[key])) != len(set(stasks[key])): + print "Incorrect LB Programming for service %s" % service + print "control-plane backend tasks:" + for task in stasks[key]: + print task + print "kernel IPVS backend tasks:" + for task in realmark[key]: + print task + else: + print "service %s... OK" % service + +if __name__ == '__main__': + if len(sys.argv) < 2: + print 'Usage: ssd.py network-name [gossip-consistency]' + sys.exit() + + cli = docker.APIClient(base_url='unix://var/run/docker.sock', version='auto') + if len(sys.argv) == 3: + command = sys.argv[2] + else: + command = 'default' + + if command == 'gossip-consistency': + cspec = docker.types.ContainerSpec( + image='sanimej/ssd', + args=[sys.argv[1], 'gossip-hash'], + mounts=[docker.types.Mount('/var/run/docker.sock', '/var/run/docker.sock', type='bind')] + ) + mode = docker.types.ServiceMode( + mode='global' + ) + task_template = docker.types.TaskTemplate(cspec) + + cli.create_service(task_template, name='gossip-hash', mode=mode) + #TODO change to a deterministic way to check if the service is up. + time.sleep(5) + output = cli.service_logs('gossip-hash', stdout=True, stderr=True, details=True) + for line in output: + print("Node id: %s gossip hash %s" % (line[line.find("=")+1:line.find(",")], line[line.find(" ")+1:])) + if cli.remove_service('gossip-hash') is not True: + print("Deleting gossip-hash service failed") + elif command == 'gossip-hash': + data = cli.inspect_network(sys.argv[1], verbose=True) + services = data["Services"] + md5 = hashlib.md5() + entries = [] + for service, value in services.items(): + entries.append(service) + entries.append(value["VIP"]) + for task in value["Tasks"]: + for key, val in task.items(): + if isinstance(val, dict): + for k, v in val.items(): + entries.append(v) + else: + entries.append(val) + entries.sort() + for e in entries: + md5.update(e) + print(md5.hexdigest()) + sys.stdout.flush() + while True: + signal.pause() + elif command == 'default': + if sys.argv[1] == "ingress": + check_network("ingress", ingress=True) + else: + check_network(sys.argv[1]) + check_network("ingress", ingress=True)