1、准备好3个目录:dags、logs、plugs
2、准备docker-compose文件docker-compose.yaml
# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # Basic Airflow cluster configuration for CeleryExecutor with Redis and PostgreSQL. # # WARNING: This configuration is for local development. Do not use it in a production deployment. # # This configuration supports basic configuration using environment variables or an .env file # The following variables are supported: # # AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. # Default: apache/airflow:master-python3.8 # AIRFLOW_UID - User ID in Airflow containers # Default: 50000 # AIRFLOW_GID - Group ID in Airflow containers # Default: 50000 # _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account. # Default: airflow # _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account. # Default: airflow # # Feel free to modify this file to suit your needs. --- version: '3' x-airflow-common: &airflow-common image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.0.2} environment: &airflow-common-env AIRFLOW__CORE__EXECUTOR: CeleryExecutor AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0 AIRFLOW__CORE__FERNET_KEY: '' AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' AIRFLOW__CORE__LOAD_EXAMPLES: 'true' AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth' volumes: - ./dags:/opt/airflow/dags - ./logs:/opt/airflow/logs - ./plugins:/opt/airflow/plugins user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}" depends_on: redis: condition: service_healthy postgres: condition: service_healthy services: postgres: image: postgres:13 environment: POSTGRES_USER: airflow POSTGRES_PASSWORD: airflow POSTGRES_DB: airflow volumes: - postgres-db-volume:/var/lib/postgresql/data healthcheck: test: ["CMD", "pg_isready", "-U", "airflow"] interval: 5s retries: 5 restart: always redis: image: redis:latest ports: - 12379:6379 healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 5s timeout: 30s retries: 50 restart: always airflow-webserver: <<: *airflow-common command: webserver ports: - 12080:8080 healthcheck: test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] interval: 10s timeout: 10s retries: 5 restart: always airflow-scheduler: <<: *airflow-common command: scheduler restart: always airflow-worker: <<: *airflow-common command: celery worker restart: always airflow-init: <<: *airflow-common command: version environment: <<: *airflow-common-env _AIRFLOW_DB_UPGRADE: 'true' _AIRFLOW_WWW_USER_CREATE: 'true' _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} flower: <<: *airflow-common command: celery flower ports: - 12555:5555 healthcheck: test: ["CMD", "curl", "--fail", "http://localhost:5555/"] interval: 10s timeout: 10s retries: 5 restart: always volumes: postgres-db-volume:
3、使用docker-compose部署airflow
docker-compose up -d
4、编写任务文件test_utils.py放到dags目录,将会自动调用
"""Used for unit tests""" from airflow import DAG from airflow.operators.bash import BashOperator from airflow.utils.dates import days_ago with DAG(dag_id='test_utils', schedule_interval=None, tags=['example']) as dag: task = BashOperator( task_id='sleeps_forever', bash_command="sleep 10000000000", start_date=days_ago(2), owner='airflow', )