What is the right approach to make my Amazon ECS tasks update their Docker images, once said images have been updated in the corresponding registry?
I created a script for deploying updated Docker images to a staging service on ECS, so that the corresponding task definition refers to the current versions of the Docker images. I don't know for sure if I'm following best practices, so feedback would be welcome.
For the script to work, you need either a spare ECS instance or a deploymentConfiguration.minimumHealthyPercent
value so that ECS can steal an instance to deploy the updated task definition to.
My algorithm is like this:
- Tag Docker images corresponding to containers in the task definition with the Git revision.
- Push the Docker image tags to the corresponding registries.
- Deregister old task definitions in the task definition family.
- Register new task definition, now referring to Docker images tagged with current Git revisions.
- Update service to use new task definition.
My code pasted below:
deploy-ecs
#!/usr/bin/env python3
import subprocess
import sys
import os.path
import json
import re
import argparse
import tempfile
_root_dir = os.path.abspath(os.path.normpath(os.path.dirname(__file__)))
sys.path.insert(0, _root_dir)
from _common import *
def _run_ecs_command(args):
run_command(['aws', 'ecs', ] + args)
def _get_ecs_output(args):
return json.loads(run_command(['aws', 'ecs', ] + args, return_stdout=True))
def _tag_image(tag, qualified_image_name, purge):
log_info('Tagging image \'{}\' as \'{}\'...'.format(
qualified_image_name, tag))
log_info('Pulling image from registry in order to tag...')
run_command(
['docker', 'pull', qualified_image_name], capture_stdout=False)
run_command(['docker', 'tag', '-f', qualified_image_name, '{}:{}'.format(
qualified_image_name, tag), ])
log_info('Pushing image tag to registry...')
run_command(['docker', 'push', '{}:{}'.format(
qualified_image_name, tag), ], capture_stdout=False)
if purge:
log_info('Deleting pulled image...')
run_command(
['docker', 'rmi', '{}:latest'.format(qualified_image_name), ])
run_command(
['docker', 'rmi', '{}:{}'.format(qualified_image_name, tag), ])
def _register_task_definition(task_definition_fpath, purge):
with open(task_definition_fpath, 'rt') as f:
task_definition = json.loads(f.read())
task_family = task_definition['family']
tag = run_command([
'git', 'rev-parse', '--short', 'HEAD', ], return_stdout=True).strip()
for container_def in task_definition['containerDefinitions']:
image_name = container_def['image']
_tag_image(tag, image_name, purge)
container_def['image'] = '{}:{}'.format(image_name, tag)
log_info('Finding existing task definitions of family \'{}\'...'.format(
task_family
))
existing_task_definitions = _get_ecs_output(['list-task-definitions', ])[
'taskDefinitionArns']
for existing_task_definition in [
td for td in existing_task_definitions if re.match(
r'arn:aws:ecs+:[^:]+:[^:]+:task-definition/{}:\d+'.format(
task_family),
td)]:
log_info('Deregistering task definition \'{}\'...'.format(
existing_task_definition))
_run_ecs_command([
'deregister-task-definition', '--task-definition',
existing_task_definition, ])
with tempfile.NamedTemporaryFile(mode='wt', suffix='.json') as f:
task_def_str = json.dumps(task_definition)
f.write(task_def_str)
f.flush()
log_info('Registering task definition...')
result = _get_ecs_output([
'register-task-definition',
'--cli-input-json', 'file://{}'.format(f.name),
])
return '{}:{}'.format(task_family, result['taskDefinition']['revision'])
def _update_service(service_fpath, task_def_name):
with open(service_fpath, 'rt') as f:
service_config = json.loads(f.read())
services = _get_ecs_output(['list-services', ])[
'serviceArns']
for service in [s for s in services if re.match(
r'arn:aws:ecs:[^:]+:[^:]+:service/{}'.format(
service_config['serviceName']),
s
)]:
log_info('Updating service with new task definition...')
_run_ecs_command([
'update-service', '--service', service,
'--task-definition', task_def_name,
])
parser = argparse.ArgumentParser(
description="""Deploy latest Docker image to staging server.
The task definition file is used as the task definition, whereas
the service file is used to configure the service.
""")
parser.add_argument(
'task_definition_file', help='Your task definition JSON file')
parser.add_argument('service_file', help='Your service JSON file')
parser.add_argument(
'--purge_image', action='store_true', default=False,
help='Purge Docker image after tagging?')
args = parser.parse_args()
task_definition_file = os.path.abspath(args.task_definition_file)
service_file = os.path.abspath(args.service_file)
os.chdir(_root_dir)
task_def_name = _register_task_definition(
task_definition_file, args.purge_image)
_update_service(service_file, task_def_name)
_common.py
import sys
import subprocess
__all__ = ['log_info', 'handle_error', 'run_command', ]
def log_info(msg):
sys.stdout.write('* {}\n'.format(msg))
sys.stdout.flush()
def handle_error(msg):
sys.stderr.write('* {}\n'.format(msg))
sys.exit(1)
def run_command(
command, ignore_error=False, return_stdout=False, capture_stdout=True):
if not isinstance(command, (list, tuple)):
command = [command, ]
command_str = ' '.join(command)
log_info('Running command {}'.format(command_str))
try:
if capture_stdout:
stdout = subprocess.check_output(command)
else:
subprocess.check_call(command)
stdout = None
except subprocess.CalledProcessError as err:
if not ignore_error:
handle_error('Command failed: {}'.format(err))
else:
return stdout.decode() if return_stdout else None
If your task is running under a service you can force a new deployment. This forces the task definition to be re-evaluated and the new container image to be pulled.
aws ecs update-service --cluster <cluster name> --service <service name> --force-new-deployment
--region <region>
flag –
Paraboloid AWS::ECS::Service
with DeploymentConfiguration: MinimumHealthyPercent: 50 MaximumPercent: 200
–
Scroll --no-cli-pager
to prevent the command from prompting the user for input. –
Dexamethasone Every time you start a task (either through the StartTask
and RunTask
API calls or that is started automatically as part of a Service), the ECS Agent will perform a docker pull
of the image
you specify in your task definition. If you use the same image name (including tag) each time you push to your registry, you should be able to have the new image run by running a new task. Note that if Docker cannot reach the registry for any reason (e.g., network issues or authentication issues), the ECS Agent will attempt to use a cached image; if you want to avoid cached images from being used when you update your image, you'll want to push a different tag to your registry each time and update your task definition correspondingly before running the new task.
Update: This behavior can now be tuned through the ECS_IMAGE_PULL_BEHAVIOR
environment variable set on the ECS agent. See the documentation for details. As of the time of writing, the following settings are supported:
The behavior used to customize the pull image process for your container instances. The following describes the optional behaviors:
If
default
is specified, the image is pulled remotely. If the image pull fails, then the container uses the cached image on the instance.If
always
is specified, the image is always pulled remotely. If the image pull fails, then the task fails. This option ensures that the latest version of the image is always pulled. Any cached images are ignored and are subject to the automated image cleanup process.If
once
is specified, the image is pulled remotely only if it has not been pulled by a previous task on the same container instance or if the cached image was removed by the automated image cleanup process. Otherwise, the cached image on the instance is used. This ensures that no unnecessary image pulls are attempted.If
prefer-cached
is specified, the image is pulled remotely if there is no cached image. Otherwise, the cached image on the instance is used. Automated image cleanup is disabled for the container to ensure that the cached image is not removed.
/var/log/ecs
. –
Manipular Registering a new task definition and updating the service to use the new task definition is the approach recommended by AWS. The easiest way to do this is to:
- Navigate to Task Definitions
- Select the correct task
- Choose create new revision
- If you're already pulling the latest version of the container image with something like the :latest tag, then just click Create. Otherwise, update the version number of the container image and then click Create.
- Expand Actions
- Choose Update Service (twice)
- Then wait for the service to be restarted
This tutorial has more detail and describes how the above steps fit into an end-to-end product development process.
Full disclosure: This tutorial features containers from Bitnami and I work for Bitnami. However the thoughts expressed here are my own and not the opinion of Bitnami.
There are two ways to do this.
First, use AWS CodeDeploy. You can config Blue/Green deployment sections in ECS service definition. This includes a CodeDeployRoleForECS, another TargetGroup for switch, and a test Listener (optional). AWS ECS will create CodeDeploy application and deployment group and link these CodeDeploy resources with your ECS Cluster/Service and your ELB/TargetGroups for you. Then you can use CodeDeploy to initiate a deployment, in which you need to enter an AppSpec that specifies using what task/container to update what service. Here is where you specify your new task/container. Then, you will see new instances are spin up in the new TargetGroup and the old TargetGroup is disconnected to the ELB, and soon the old instances registered to the old TargetGroup will be terminated.
This sounds very complicated. Actually, since/if you have enabled auto scaling on your ECS service, a simple way to do it is to just force a new deployment using console or cli, like a gentleman here pointed out:
aws ecs update-service --cluster <cluster name> --service <service name> --force-new-deployment
In this way you can still use the "rolling update" deployment type, and ECS will simply spin up new instances and drain the old ones with no downtime of your service if everything is OK. The bad side is you lose fine control on the deployment and you cannot roll back to previous version if there is an error and this will break the ongoing service. But this is a really simple way to go.
BTW, don't forget to set proper numbers for Minimum healthy percent and Maximum percent, like 100 and 200.
Ran into same issue. After spending hours, have concluded these simplified steps for automated deployment of updated image:
1.ECS task definition changes: For a better understanding, let's assume you have created a task definition with below details (note: these numbers would change accordingly as per your task definition):
launch_type = EC2
desired_count = 1
Then you need to make the following changes:
deployment_minimum_healthy_percent = 0 //this does the trick, if not set to zero the force deployment wont happen as ECS won't allow to stop the current running task
deployment_maximum_percent = 200 //for allowing rolling update
2.Tag your image as <your-image-name>:latest . The latest key takes care of getting pulled by the respective ECS task.
sudo docker build -t imageX:master . //build your image with some tag
sudo -s eval $(aws ecr get-login --no-include-email --region us-east-1) //login to ECR
sudo docker tag imageX:master <your_account_id>.dkr.ecr.us-east-1.amazonaws.com/<your-image-name>:latest //tag your image with latest tag
3.Push to the image to ECR
sudo docker push <your_account_id>.dkr.ecr.us-east-1.amazonaws.com/<your-image-name>:latest
4.apply force-deployment
sudo aws ecs update-service --cluster <your-cluster-name> --service <your-service-name> --force-new-deployment --region us-east-1
Note: I have written all the commands assuming the region to be us-east-1. Just replace it with your respective region while implementing.
Following worked for me in case the docker image tag is same:
- Go to cluster and service.
- Select service and click update.
- Set number of tasks as 0 and update.
- After deployment is finished, re-scale number of tasks to 1.
Following api works as well:
aws ecs update-service --cluster <cluster_name> --service <service_name> --force-new-deployment
I created a script for deploying updated Docker images to a staging service on ECS, so that the corresponding task definition refers to the current versions of the Docker images. I don't know for sure if I'm following best practices, so feedback would be welcome.
For the script to work, you need either a spare ECS instance or a deploymentConfiguration.minimumHealthyPercent
value so that ECS can steal an instance to deploy the updated task definition to.
My algorithm is like this:
- Tag Docker images corresponding to containers in the task definition with the Git revision.
- Push the Docker image tags to the corresponding registries.
- Deregister old task definitions in the task definition family.
- Register new task definition, now referring to Docker images tagged with current Git revisions.
- Update service to use new task definition.
My code pasted below:
deploy-ecs
#!/usr/bin/env python3
import subprocess
import sys
import os.path
import json
import re
import argparse
import tempfile
_root_dir = os.path.abspath(os.path.normpath(os.path.dirname(__file__)))
sys.path.insert(0, _root_dir)
from _common import *
def _run_ecs_command(args):
run_command(['aws', 'ecs', ] + args)
def _get_ecs_output(args):
return json.loads(run_command(['aws', 'ecs', ] + args, return_stdout=True))
def _tag_image(tag, qualified_image_name, purge):
log_info('Tagging image \'{}\' as \'{}\'...'.format(
qualified_image_name, tag))
log_info('Pulling image from registry in order to tag...')
run_command(
['docker', 'pull', qualified_image_name], capture_stdout=False)
run_command(['docker', 'tag', '-f', qualified_image_name, '{}:{}'.format(
qualified_image_name, tag), ])
log_info('Pushing image tag to registry...')
run_command(['docker', 'push', '{}:{}'.format(
qualified_image_name, tag), ], capture_stdout=False)
if purge:
log_info('Deleting pulled image...')
run_command(
['docker', 'rmi', '{}:latest'.format(qualified_image_name), ])
run_command(
['docker', 'rmi', '{}:{}'.format(qualified_image_name, tag), ])
def _register_task_definition(task_definition_fpath, purge):
with open(task_definition_fpath, 'rt') as f:
task_definition = json.loads(f.read())
task_family = task_definition['family']
tag = run_command([
'git', 'rev-parse', '--short', 'HEAD', ], return_stdout=True).strip()
for container_def in task_definition['containerDefinitions']:
image_name = container_def['image']
_tag_image(tag, image_name, purge)
container_def['image'] = '{}:{}'.format(image_name, tag)
log_info('Finding existing task definitions of family \'{}\'...'.format(
task_family
))
existing_task_definitions = _get_ecs_output(['list-task-definitions', ])[
'taskDefinitionArns']
for existing_task_definition in [
td for td in existing_task_definitions if re.match(
r'arn:aws:ecs+:[^:]+:[^:]+:task-definition/{}:\d+'.format(
task_family),
td)]:
log_info('Deregistering task definition \'{}\'...'.format(
existing_task_definition))
_run_ecs_command([
'deregister-task-definition', '--task-definition',
existing_task_definition, ])
with tempfile.NamedTemporaryFile(mode='wt', suffix='.json') as f:
task_def_str = json.dumps(task_definition)
f.write(task_def_str)
f.flush()
log_info('Registering task definition...')
result = _get_ecs_output([
'register-task-definition',
'--cli-input-json', 'file://{}'.format(f.name),
])
return '{}:{}'.format(task_family, result['taskDefinition']['revision'])
def _update_service(service_fpath, task_def_name):
with open(service_fpath, 'rt') as f:
service_config = json.loads(f.read())
services = _get_ecs_output(['list-services', ])[
'serviceArns']
for service in [s for s in services if re.match(
r'arn:aws:ecs:[^:]+:[^:]+:service/{}'.format(
service_config['serviceName']),
s
)]:
log_info('Updating service with new task definition...')
_run_ecs_command([
'update-service', '--service', service,
'--task-definition', task_def_name,
])
parser = argparse.ArgumentParser(
description="""Deploy latest Docker image to staging server.
The task definition file is used as the task definition, whereas
the service file is used to configure the service.
""")
parser.add_argument(
'task_definition_file', help='Your task definition JSON file')
parser.add_argument('service_file', help='Your service JSON file')
parser.add_argument(
'--purge_image', action='store_true', default=False,
help='Purge Docker image after tagging?')
args = parser.parse_args()
task_definition_file = os.path.abspath(args.task_definition_file)
service_file = os.path.abspath(args.service_file)
os.chdir(_root_dir)
task_def_name = _register_task_definition(
task_definition_file, args.purge_image)
_update_service(service_file, task_def_name)
_common.py
import sys
import subprocess
__all__ = ['log_info', 'handle_error', 'run_command', ]
def log_info(msg):
sys.stdout.write('* {}\n'.format(msg))
sys.stdout.flush()
def handle_error(msg):
sys.stderr.write('* {}\n'.format(msg))
sys.exit(1)
def run_command(
command, ignore_error=False, return_stdout=False, capture_stdout=True):
if not isinstance(command, (list, tuple)):
command = [command, ]
command_str = ' '.join(command)
log_info('Running command {}'.format(command_str))
try:
if capture_stdout:
stdout = subprocess.check_output(command)
else:
subprocess.check_call(command)
stdout = None
except subprocess.CalledProcessError as err:
if not ignore_error:
handle_error('Command failed: {}'.format(err))
else:
return stdout.decode() if return_stdout else None
If you use any IAC tool to setup your ECS tasks like terraform, then you could always do it with updating image versions in your task definition. Terraform would basically replace the old task definition and create new one and ECS service will start using the new task definition with updated image.
Other way around is always having aws ecs update command in your pipeline which builds your image to be used in ECS tasks and as soon as you built the images - just do a force deployment.
aws ecs update-service --cluster clusterName --service serviceName --force-new-deployment
since there has not been any progress at AWS side. I will give you the simple python script that exactly performs the steps described in the high rated answers of Dima and Samuel Karp.
First push your image into your AWS registry ECR then run the script:
import boto3, time
client = boto3.client('ecs')
cluster_name = "Example_Cluster"
service_name = "Example-service"
reason_to_stop = "obsolete deployment"
# Create new deployment; ECS Service forces to pull from docker registry, creates new task in service
response = client.update_service(cluster=cluster_name, service=service_name, forceNewDeployment=True)
# Wait for ecs agent to start new task
time.sleep(10)
# Get all Service Tasks
service_tasks = client.list_tasks(cluster=cluster_name, serviceName=service_name)
# Get meta data for all Service Tasks
task_meta_data = client.describe_tasks(cluster=cluster_name, tasks=service_tasks["taskArns"])
# Extract creation date
service_tasks = [(task_data['taskArn'], task_data['createdAt']) for task_data in task_meta_data["tasks"]]
# Sort according to creation date
service_tasks = sorted(service_tasks, key= lambda task: task[1])
# Get obsolete task arn
obsolete_task_arn = service_tasks[0][0]
print("stop ", obsolete_task_arn)
# Stop obsolete task
stop_response = client.stop_task(cluster=cluster_name, task=obsolete_task_arn, reason=reason_to_stop)
This code does:
- create a new task with the new image in the service
- stop the obsolete old task with the old image in the service
AWS CodePipeline.
You can set ECR as a source, and ECS as a target to deploy to.
Using AWS cli I tried aws ecs update-service as suggested above. Did not pick up latest docker from ECR. In the end, I rerun my Ansible playbook that created the ECS cluster. The version of the task definition is bumped when ecs_taskdefinition runs. Then all is good. The new docker image is picked up.
Truthfully not sure if the task version change forces the redeploy, or if the playbook using the ecs_service causes the task to reload.
If anyone is interested, I'll get permission to publish a sanitized version of my playbook.
The following commands worked for me
docker build -t <repo> .
docker push <repo>
ecs-cli compose stop
ecs-cli compose start
© 2022 - 2024 — McMap. All rights reserved.