from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Sequence, Union, cast
import dagster._check as check
from dagster._annotations import public
from dagster._core.definitions.events import AssetKey, AssetObservation
from dagster._core.definitions.metadata import MetadataEntry, PartitionMetadataEntry
from dagster._core.definitions.partition_key_range import PartitionKeyRange
from dagster._core.definitions.time_window_partitions import (
TimeWindow,
TimeWindowPartitionsDefinition,
)
from dagster._core.errors import DagsterInvariantViolationError
if TYPE_CHECKING:
from dagster._core.definitions import PartitionsDefinition, SolidDefinition
from dagster._core.definitions.op_definition import OpDefinition
from dagster._core.definitions.resource_definition import Resources
from dagster._core.events import DagsterEvent
from dagster._core.execution.context.system import StepExecutionContext
from dagster._core.log_manager import DagsterLogManager
from dagster._core.types.dagster_type import DagsterType
from .output import OutputContext
[docs]class InputContext:
"""
The ``context`` object available to the load_input method of :py:class:`RootInputManager`.
Users should not instantiate this object directly. In order to construct
an `InputContext` for testing an IO Manager's `load_input` method, use
:py:func:`dagster.build_input_context`.
Attributes:
name (Optional[str]): The name of the input that we're loading.
config (Optional[Any]): The config attached to the input that we're loading.
metadata (Optional[Dict[str, Any]]): A dict of metadata that is assigned to the
InputDefinition that we're loading for.
upstream_output (Optional[OutputContext]): Info about the output that produced the object
we're loading.
dagster_type (Optional[DagsterType]): The type of this input.
log (Optional[DagsterLogManager]): The log manager to use for this input.
resource_config (Optional[Dict[str, Any]]): The config associated with the resource that
initializes the RootInputManager.
resources (Optional[Resources]): The resources required by the resource that initializes the
input manager. If using the :py:func:`@root_input_manager` decorator, these resources
correspond to those requested with the `required_resource_keys` parameter.
op_def (Optional[OpDefinition]): The definition of the op that's loading the input.
Example:
.. code-block:: python
from dagster import IOManager, InputContext
class MyIOManager(IOManager):
def load_input(self, context: InputContext):
...
"""
def __init__(
self,
name: Optional[str] = None,
pipeline_name: Optional[str] = None,
solid_def: Optional["SolidDefinition"] = None,
config: Optional[Any] = None,
metadata: Optional[Dict[str, Any]] = None,
upstream_output: Optional["OutputContext"] = None,
dagster_type: Optional["DagsterType"] = None,
log_manager: Optional["DagsterLogManager"] = None,
resource_config: Optional[Dict[str, Any]] = None,
resources: Optional[Union["Resources", Dict[str, Any]]] = None,
step_context: Optional["StepExecutionContext"] = None,
op_def: Optional["OpDefinition"] = None,
asset_key: Optional[AssetKey] = None,
):
from dagster._core.definitions.resource_definition import IContainsGenerator, Resources
from dagster._core.execution.build_resources import build_resources
self._name = name
self._pipeline_name = pipeline_name
check.invariant(
solid_def is None or op_def is None, "Can't provide both a solid_def and an op_def arg"
)
self._solid_def = solid_def or op_def
self._config = config
self._metadata = metadata
self._upstream_output = upstream_output
self._dagster_type = dagster_type
self._log = log_manager
self._resource_config = resource_config
self._step_context = step_context
self._asset_key = asset_key
if isinstance(resources, Resources):
self._resources_cm = None
self._resources = resources
else:
self._resources_cm = build_resources(
check.opt_dict_param(resources, "resources", key_type=str)
)
self._resources = self._resources_cm.__enter__() # pylint: disable=no-member
self._resources_contain_cm = isinstance(self._resources, IContainsGenerator)
self._cm_scope_entered = False
self._events: List["DagsterEvent"] = []
self._observations: List[AssetObservation] = []
self._metadata_entries: List[Union[MetadataEntry, PartitionMetadataEntry]] = []
def __enter__(self):
if self._resources_cm:
self._cm_scope_entered = True
return self
def __exit__(self, *exc):
if self._resources_cm:
self._resources_cm.__exit__(*exc) # pylint: disable=no-member
def __del__(self):
if self._resources_cm and self._resources_contain_cm and not self._cm_scope_entered:
self._resources_cm.__exit__(None, None, None) # pylint: disable=no-member
@public # type: ignore
@property
def has_input_name(self) -> bool:
"""If we're the InputContext is being used to load the result of a run from outside the run,
then it won't have an input name."""
return self._name is not None
@public # type: ignore
@property
def name(self) -> str:
if self._name is None:
raise DagsterInvariantViolationError(
"Attempting to access name, "
"but it was not provided when constructing the InputContext"
)
return self._name
@property
def pipeline_name(self) -> str:
if self._pipeline_name is None:
raise DagsterInvariantViolationError(
"Attempting to access pipeline_name, "
"but it was not provided when constructing the InputContext"
)
return self._pipeline_name
@property
def solid_def(self) -> "SolidDefinition":
if self._solid_def is None:
raise DagsterInvariantViolationError(
"Attempting to access solid_def, "
"but it was not provided when constructing the InputContext"
)
return self._solid_def
@public # type: ignore
@property
def op_def(self) -> "OpDefinition":
from dagster._core.definitions import OpDefinition
if self._solid_def is None:
raise DagsterInvariantViolationError(
"Attempting to access op_def, "
"but it was not provided when constructing the InputContext"
)
return cast(OpDefinition, self._solid_def)
@public # type: ignore
@property
def config(self) -> Any:
return self._config
@public # type: ignore
@property
def metadata(self) -> Optional[Dict[str, Any]]:
return self._metadata
@public # type: ignore
@property
def upstream_output(self) -> Optional["OutputContext"]:
return self._upstream_output
@public # type: ignore
@property
def dagster_type(self) -> "DagsterType":
if self._dagster_type is None:
raise DagsterInvariantViolationError(
"Attempting to access dagster_type, "
"but it was not provided when constructing the InputContext"
)
return self._dagster_type
@public # type: ignore
@property
def log(self) -> "DagsterLogManager":
if self._log is None:
raise DagsterInvariantViolationError(
"Attempting to access log, "
"but it was not provided when constructing the InputContext"
)
return self._log
@public # type: ignore
@property
def resource_config(self) -> Optional[Dict[str, Any]]:
return self._resource_config
@public # type: ignore
@property
def resources(self) -> Any:
if self._resources is None:
raise DagsterInvariantViolationError(
"Attempting to access resources, "
"but it was not provided when constructing the InputContext"
)
if self._resources_cm and self._resources_contain_cm and not self._cm_scope_entered:
raise DagsterInvariantViolationError(
"At least one provided resource is a generator, but attempting to access "
"resources outside of context manager scope. You can use the following syntax to "
"open a context manager: `with build_input_context(...) as context:`"
)
return self._resources
@public # type: ignore
@property
def has_asset_key(self) -> bool:
return self._asset_key is not None
@public # type: ignore
@property
def asset_key(self) -> AssetKey:
if self._asset_key is None:
raise DagsterInvariantViolationError(
"Attempting to access asset_key, but no asset is associated with this input"
)
return self._asset_key
@public # type: ignore
@property
def asset_partitions_def(self) -> "PartitionsDefinition":
"""The PartitionsDefinition on the upstream asset corresponding to this input."""
asset_key = self.asset_key
result = self.step_context.pipeline_def.asset_layer.partitions_def_for_asset(asset_key)
if result is None:
raise DagsterInvariantViolationError(
f"Attempting to access partitions def for asset {asset_key}, but it is not partitioned"
)
return result
@property
def step_context(self) -> "StepExecutionContext":
if self._step_context is None:
raise DagsterInvariantViolationError(
"Attempting to access step_context, "
"but it was not provided when constructing the InputContext"
)
return self._step_context
@public # type: ignore
@property
def has_partition_key(self) -> bool:
"""Whether the current run is a partitioned run"""
return self.step_context.has_partition_key
@public # type: ignore
@property
def partition_key(self) -> str:
"""The partition key for the current run.
Raises an error if the current run is not a partitioned run.
"""
return self.step_context.partition_key
@public # type: ignore
@property
def has_asset_partitions(self) -> bool:
if self._step_context is not None:
return self._step_context.has_asset_partitions_for_input(self.name)
else:
return False
@public # type: ignore
@property
def asset_partition_key(self) -> str:
"""The partition key for input asset.
Raises an error if the input asset has no partitioning, or if the run covers a partition
range for the input asset.
"""
return self.step_context.asset_partition_key_for_input(self.name)
@public # type: ignore
@property
def asset_partition_key_range(self) -> PartitionKeyRange:
"""The partition key range for input asset.
Raises an error if the input asset has no partitioning.
"""
return self.step_context.asset_partition_key_range_for_input(self.name)
@public # type: ignore
@property
def asset_partition_keys(self) -> Sequence[str]:
"""The partition keys for input asset.
Raises an error if the input asset has no partitioning.
"""
return self.asset_partitions_def.get_partition_keys_in_range(self.asset_partition_key_range)
@public # type: ignore
@property
def asset_partitions_time_window(self) -> TimeWindow:
"""The time window for the partitions of the input asset.
Raises an error if either of the following are true:
- The input asset has no partitioning.
- The input asset is not partitioned with a TimeWindowPartitionsDefinition.
"""
if self.upstream_output is None:
check.failed("InputContext needs upstream_output to get asset_partitions_time_window")
if self.upstream_output.asset_info is None:
raise ValueError(
"Tried to get asset partitions for an output that does not correspond to a "
"partitioned asset."
)
asset_info = self.upstream_output.asset_info
if not isinstance(asset_info.partitions_def, TimeWindowPartitionsDefinition):
raise ValueError(
"Tried to get asset partitions for an input that correponds to a partitioned "
"asset that is not partitioned with a TimeWindowPartitionsDefinition."
)
partitions_def: TimeWindowPartitionsDefinition = asset_info.partitions_def
partition_key_range = self.asset_partition_key_range
return TimeWindow(
partitions_def.time_window_for_partition_key(partition_key_range.start).start,
partitions_def.time_window_for_partition_key(partition_key_range.end).end,
)
[docs] @public
def get_identifier(self) -> Sequence[str]:
"""Utility method to get a collection of identifiers that as a whole represent a unique
step input.
If not using memoization, the unique identifier collection consists of
- ``run_id``: the id of the run which generates the input.
Note: This method also handles the re-execution memoization logic. If the step that
generates the input is skipped in the re-execution, the ``run_id`` will be the id
of its parent run.
- ``step_key``: the key for a compute step.
- ``name``: the name of the output. (default: 'result').
If using memoization, the ``version`` corresponding to the step output is used in place of
the ``run_id``.
Returns:
List[str, ...]: A list of identifiers, i.e. (run_id or version), step_key, and output_name
"""
if self.upstream_output is None:
raise DagsterInvariantViolationError(
"InputContext.upstream_output not defined. " "Cannot compute an identifier"
)
return self.upstream_output.get_identifier()
@public
def get_asset_identifier(self) -> Sequence[str]:
if self.asset_key is not None:
if self.has_asset_partitions:
return self.asset_key.path + [self.asset_partition_key]
else:
return self.asset_key.path
else:
check.failed("Can't get asset identifier for an input with no asset key")
def consume_events(self) -> Iterator["DagsterEvent"]:
"""Pops and yields all user-generated events that have been recorded from this context.
If consume_events has not yet been called, this will yield all logged events since the call to `handle_input`. If consume_events has been called, it will yield all events since the last time consume_events was called. Designed for internal use. Users should never need to invoke this method.
"""
events = self._events
self._events = []
yield from events
def add_input_metadata(
self,
metadata: Dict[str, Any],
description: Optional[str] = None,
) -> None:
"""Accepts a dictionary of metadata. Metadata entries will appear on the LOADED_INPUT event.
If the input is an asset, metadata will be attached to an asset observation.
The asset observation will be yielded from the run and appear in the event log.
Only valid if the context has an asset key.
"""
from dagster._core.definitions.metadata import normalize_metadata
from dagster._core.events import DagsterEvent
metadata = check.dict_param(metadata, "metadata", key_type=str)
self._metadata_entries.extend(normalize_metadata(metadata, []))
if self.has_asset_key:
check.opt_str_param(description, "description")
observation = AssetObservation(
asset_key=self.asset_key,
description=description,
partition=self.asset_partition_key if self.has_asset_partitions else None,
metadata=metadata,
)
self._observations.append(observation)
if self._step_context:
self._events.append(DagsterEvent.asset_observation(self._step_context, observation))
def get_observations(
self,
) -> List[AssetObservation]:
"""Retrieve the list of user-generated asset observations that were observed via the context.
User-generated events that were yielded will not appear in this list.
**Examples:**
.. code-block:: python
from dagster import IOManager, build_input_context, AssetObservation
class MyIOManager(IOManager):
def load_input(self, context, obj):
...
def test_load_input():
mgr = MyIOManager()
context = build_input_context()
mgr.load_input(context)
observations = context.get_observations()
...
"""
return self._observations
def consume_metadata_entries(self) -> List[Union[MetadataEntry, PartitionMetadataEntry]]:
result = self._metadata_entries
self._metadata_entries = []
return result
[docs]def build_input_context(
name: Optional[str] = None,
config: Optional[Any] = None,
metadata: Optional[Dict[str, Any]] = None,
upstream_output: Optional["OutputContext"] = None,
dagster_type: Optional["DagsterType"] = None,
resource_config: Optional[Dict[str, Any]] = None,
resources: Optional[Dict[str, Any]] = None,
op_def: Optional["OpDefinition"] = None,
step_context: Optional["StepExecutionContext"] = None,
asset_key: Optional["AssetKey"] = None,
) -> "InputContext":
"""Builds input context from provided parameters.
``build_input_context`` can be used as either a function, or a context manager. If resources
that are also context managers are provided, then ``build_input_context`` must be used as a
context manager.
Args:
name (Optional[str]): The name of the input that we're loading.
config (Optional[Any]): The config attached to the input that we're loading.
metadata (Optional[Dict[str, Any]]): A dict of metadata that is assigned to the
InputDefinition that we're loading for.
upstream_output (Optional[OutputContext]): Info about the output that produced the object
we're loading.
dagster_type (Optional[DagsterType]): The type of this input.
resource_config (Optional[Dict[str, Any]]): The resource config to make available from the
input context. This usually corresponds to the config provided to the resource that
loads the input manager.
resources (Optional[Dict[str, Any]]): The resources to make available from the context.
For a given key, you can provide either an actual instance of an object, or a resource
definition.
asset_key (Optional[AssetKey]): The asset key attached to the InputDefinition.
op_def (Optional[OpDefinition]): The definition of the op that's loading the input.
step_context (Optional[StepExecutionContext]): For internal use.
Examples:
.. code-block:: python
build_input_context()
with build_input_context(resources={"foo": context_manager_resource}) as context:
do_something
"""
from dagster._core.definitions import OpDefinition
from dagster._core.execution.context.output import OutputContext
from dagster._core.execution.context.system import StepExecutionContext
from dagster._core.execution.context_creation_pipeline import initialize_console_manager
from dagster._core.types.dagster_type import DagsterType
name = check.opt_str_param(name, "name")
metadata = check.opt_dict_param(metadata, "metadata", key_type=str)
upstream_output = check.opt_inst_param(upstream_output, "upstream_output", OutputContext)
dagster_type = check.opt_inst_param(dagster_type, "dagster_type", DagsterType)
resource_config = check.opt_dict_param(resource_config, "resource_config", key_type=str)
resources = check.opt_dict_param(resources, "resources", key_type=str)
op_def = check.opt_inst_param(op_def, "op_def", OpDefinition)
step_context = check.opt_inst_param(step_context, "step_context", StepExecutionContext)
asset_key = check.opt_inst_param(asset_key, "asset_key", AssetKey)
return InputContext(
name=name,
pipeline_name=None,
config=config,
metadata=metadata,
upstream_output=upstream_output,
dagster_type=dagster_type,
log_manager=initialize_console_manager(None),
resource_config=resource_config,
resources=resources,
step_context=step_context,
op_def=op_def,
asset_key=asset_key,
)