airflow workflows 源码

  • 2022-10-20
  • 浏览 (284)

airflow workflows 代码

文件路径:/airflow/providers/google/cloud/hooks/workflows.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from typing import Sequence

from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
from google.api_core.operation import Operation
from google.api_core.retry import Retry
from google.cloud.workflows.executions_v1beta import Execution, ExecutionsClient
from google.cloud.workflows.executions_v1beta.services.executions.pagers import ListExecutionsPager
from google.cloud.workflows_v1beta import Workflow, WorkflowsClient
from google.cloud.workflows_v1beta.services.workflows.pagers import ListWorkflowsPager
from google.protobuf.field_mask_pb2 import FieldMask

from airflow.providers.google.common.consts import CLIENT_INFO
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook


class WorkflowsHook(GoogleBaseHook):
    """
    Hook for Google GCP APIs.

    All the methods in the hook where project_id is used must be called with
    keyword arguments rather than positional.
    """

    def get_workflows_client(self) -> WorkflowsClient:
        """Returns WorkflowsClient."""
        return WorkflowsClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)

    def get_executions_client(self) -> ExecutionsClient:
        """Returns ExecutionsClient."""
        return ExecutionsClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)

    @GoogleBaseHook.fallback_to_default_project_id
    def create_workflow(
        self,
        workflow: dict,
        workflow_id: str,
        location: str,
        project_id: str = PROVIDE_PROJECT_ID,
        retry: Retry | _MethodDefault = DEFAULT,
        timeout: float | None = None,
        metadata: Sequence[tuple[str, str]] = (),
    ) -> Operation:
        """
        Creates a new workflow. If a workflow with the specified name
        already exists in the specified project and location, the long
        running operation will return
        [ALREADY_EXISTS][google.rpc.Code.ALREADY_EXISTS] error.

        :param workflow: Required. Workflow to be created.
        :param workflow_id: Required. The ID of the workflow to be created.
        :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
        :param location: Required. The GCP region in which to handle the request.
        :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
            retried.
        :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
            ``retry`` is specified, the timeout applies to each individual attempt.
        :param metadata: Additional metadata that is provided to the method.
        """
        metadata = metadata or ()
        client = self.get_workflows_client()
        parent = f"projects/{project_id}/locations/{location}"
        return client.create_workflow(
            request={"parent": parent, "workflow": workflow, "workflow_id": workflow_id},
            retry=retry,
            timeout=timeout,
            metadata=metadata,
        )

    @GoogleBaseHook.fallback_to_default_project_id
    def get_workflow(
        self,
        workflow_id: str,
        location: str,
        project_id: str = PROVIDE_PROJECT_ID,
        retry: Retry | _MethodDefault = DEFAULT,
        timeout: float | None = None,
        metadata: Sequence[tuple[str, str]] = (),
    ) -> Workflow:
        """
        Gets details of a single Workflow.

        :param workflow_id: Required. The ID of the workflow to be created.
        :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
        :param location: Required. The GCP region in which to handle the request.
        :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
            retried.
        :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
            ``retry`` is specified, the timeout applies to each individual attempt.
        :param metadata: Additional metadata that is provided to the method.
        """
        metadata = metadata or ()
        client = self.get_workflows_client()
        name = f"projects/{project_id}/locations/{location}/workflows/{workflow_id}"
        return client.get_workflow(request={"name": name}, retry=retry, timeout=timeout, metadata=metadata)

    def update_workflow(
        self,
        workflow: dict | Workflow,
        update_mask: FieldMask | None = None,
        retry: Retry | _MethodDefault = DEFAULT,
        timeout: float | None = None,
        metadata: Sequence[tuple[str, str]] = (),
    ) -> Operation:
        """
        Updates an existing workflow.
        Running this method has no impact on already running
        executions of the workflow. A new revision of the
        workflow may be created as a result of a successful
        update operation. In that case, such revision will be
        used in new workflow executions.

        :param workflow: Required. Workflow to be created.
        :param update_mask: List of fields to be updated. If not present,
            the entire workflow will be updated.
        :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
            retried.
        :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
            ``retry`` is specified, the timeout applies to each individual attempt.
        :param metadata: Additional metadata that is provided to the method.
        """
        metadata = metadata or ()
        client = self.get_workflows_client()
        return client.update_workflow(
            request={"workflow": workflow, "update_mask": update_mask},
            retry=retry,
            timeout=timeout,
            metadata=metadata,
        )

    @GoogleBaseHook.fallback_to_default_project_id
    def delete_workflow(
        self,
        workflow_id: str,
        location: str,
        project_id: str = PROVIDE_PROJECT_ID,
        retry: Retry | _MethodDefault = DEFAULT,
        timeout: float | None = None,
        metadata: Sequence[tuple[str, str]] = (),
    ) -> Operation:
        """
        Deletes a workflow with the specified name.
        This method also cancels and deletes all running
        executions of the workflow.

        :param workflow_id: Required. The ID of the workflow to be created.
        :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
        :param location: Required. The GCP region in which to handle the request.
        :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
            retried.
        :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
            ``retry`` is specified, the timeout applies to each individual attempt.
        :param metadata: Additional metadata that is provided to the method.
        """
        metadata = metadata or ()
        client = self.get_workflows_client()
        name = f"projects/{project_id}/locations/{location}/workflows/{workflow_id}"
        return client.delete_workflow(request={"name": name}, retry=retry, timeout=timeout, metadata=metadata)

    @GoogleBaseHook.fallback_to_default_project_id
    def list_workflows(
        self,
        location: str,
        project_id: str = PROVIDE_PROJECT_ID,
        filter_: str | None = None,
        order_by: str | None = None,
        retry: Retry | _MethodDefault = DEFAULT,
        timeout: float | None = None,
        metadata: Sequence[tuple[str, str]] = (),
    ) -> ListWorkflowsPager:
        """
        Lists Workflows in a given project and location.
        The default order is not specified.

        :param filter_: Filter to restrict results to specific workflows.
        :param order_by: Comma-separated list of fields that
            specifies the order of the results. Default sorting order for a field is ascending.
            To specify descending order for a field, append a "desc" suffix.
            If not specified, the results will be returned in an unspecified order.
        :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
        :param location: Required. The GCP region in which to handle the request.
        :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
            retried.
        :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
            ``retry`` is specified, the timeout applies to each individual attempt.
        :param metadata: Additional metadata that is provided to the method.
        """
        metadata = metadata or ()
        client = self.get_workflows_client()
        parent = f"projects/{project_id}/locations/{location}"

        return client.list_workflows(
            request={"parent": parent, "filter": filter_, "order_by": order_by},
            retry=retry,
            timeout=timeout,
            metadata=metadata,
        )

    @GoogleBaseHook.fallback_to_default_project_id
    def create_execution(
        self,
        workflow_id: str,
        location: str,
        execution: dict,
        project_id: str = PROVIDE_PROJECT_ID,
        retry: Retry | _MethodDefault = DEFAULT,
        timeout: float | None = None,
        metadata: Sequence[tuple[str, str]] = (),
    ) -> Execution:
        """
        Creates a new execution using the latest revision of
        the given workflow.

        :param execution: Required. Input parameters of the execution represented as a dictionary.
        :param workflow_id: Required. The ID of the workflow.
        :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
        :param location: Required. The GCP region in which to handle the request.
        :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
            retried.
        :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
            ``retry`` is specified, the timeout applies to each individual attempt.
        :param metadata: Additional metadata that is provided to the method.
        """
        metadata = metadata or ()
        client = self.get_executions_client()
        parent = f"projects/{project_id}/locations/{location}/workflows/{workflow_id}"
        return client.create_execution(
            request={"parent": parent, "execution": execution},
            retry=retry,
            timeout=timeout,
            metadata=metadata,
        )

    @GoogleBaseHook.fallback_to_default_project_id
    def get_execution(
        self,
        workflow_id: str,
        execution_id: str,
        location: str,
        project_id: str = PROVIDE_PROJECT_ID,
        retry: Retry | _MethodDefault = DEFAULT,
        timeout: float | None = None,
        metadata: Sequence[tuple[str, str]] = (),
    ) -> Execution:
        """
        Returns an execution for the given ``workflow_id`` and ``execution_id``.

        :param workflow_id: Required. The ID of the workflow.
        :param execution_id: Required. The ID of the execution.
        :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
        :param location: Required. The GCP region in which to handle the request.
        :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
            retried.
        :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
            ``retry`` is specified, the timeout applies to each individual attempt.
        :param metadata: Additional metadata that is provided to the method.
        """
        metadata = metadata or ()
        client = self.get_executions_client()
        name = f"projects/{project_id}/locations/{location}/workflows/{workflow_id}/executions/{execution_id}"
        return client.get_execution(request={"name": name}, retry=retry, timeout=timeout, metadata=metadata)

    @GoogleBaseHook.fallback_to_default_project_id
    def cancel_execution(
        self,
        workflow_id: str,
        execution_id: str,
        location: str,
        project_id: str = PROVIDE_PROJECT_ID,
        retry: Retry | _MethodDefault = DEFAULT,
        timeout: float | None = None,
        metadata: Sequence[tuple[str, str]] = (),
    ) -> Execution:
        """
        Cancels an execution using the given ``workflow_id`` and ``execution_id``.

        :param workflow_id: Required. The ID of the workflow.
        :param execution_id: Required. The ID of the execution.
        :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
        :param location: Required. The GCP region in which to handle the request.
        :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
            retried.
        :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
            ``retry`` is specified, the timeout applies to each individual attempt.
        :param metadata: Additional metadata that is provided to the method.
        """
        metadata = metadata or ()
        client = self.get_executions_client()
        name = f"projects/{project_id}/locations/{location}/workflows/{workflow_id}/executions/{execution_id}"
        return client.cancel_execution(
            request={"name": name}, retry=retry, timeout=timeout, metadata=metadata
        )

    @GoogleBaseHook.fallback_to_default_project_id
    def list_executions(
        self,
        workflow_id: str,
        location: str,
        project_id: str = PROVIDE_PROJECT_ID,
        retry: Retry | _MethodDefault = DEFAULT,
        timeout: float | None = None,
        metadata: Sequence[tuple[str, str]] = (),
    ) -> ListExecutionsPager:
        """
        Returns a list of executions which belong to the
        workflow with the given name. The method returns
        executions of all workflow revisions. Returned
        executions are ordered by their start time (newest
        first).

        :param workflow_id: Required. The ID of the workflow to be created.
        :param project_id: Required. The ID of the Google Cloud project the cluster belongs to.
        :param location: Required. The GCP region in which to handle the request.
        :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
            retried.
        :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
            ``retry`` is specified, the timeout applies to each individual attempt.
        :param metadata: Additional metadata that is provided to the method.
        """
        metadata = metadata or ()
        client = self.get_executions_client()
        parent = f"projects/{project_id}/locations/{location}/workflows/{workflow_id}"
        return client.list_executions(
            request={"parent": parent}, retry=retry, timeout=timeout, metadata=metadata
        )

相关信息

airflow 源码目录

相关文章

airflow init 源码

airflow automl 源码

airflow bigquery 源码

airflow bigquery_dts 源码

airflow bigtable 源码

airflow cloud_build 源码

airflow cloud_composer 源码

airflow cloud_memorystore 源码

airflow cloud_sql 源码

airflow cloud_storage_transfer_service 源码

0  赞