hydra: [Bug] Unexpected ConfigKeyError while composing config

🐛 Bug

Description

When configs based on Baseclasses are nested more than once merging doesn’t work anymore.

Checklist

  • I checked on the latest version of Hydra
  • I created a minimal repro (See this for tips).

To reproduce

** Minimal Code/Config snippet to reproduce **

from dataclasses import dataclass, field
from typing import Any, List

import hydra.utils
from hydra.core.config_store import ConfigStore
from omegaconf import MISSING

# I have multiple datasets


@dataclass
class DataConfig:
    """This is just a common base class."""


@dataclass
class Dataset1Config(DataConfig):
    some_member1: int = 1


@dataclass
class Dataset2Config(DataConfig):
    some_member2: int = 2


# I register them at some place in my folder structure.
cs = ConfigStore.instance()
cs.store(group="some/data/folder", name=Dataset1Config.__name__, node=Dataset1Config)
cs.store(group="some/data/folder", name=Dataset2Config.__name__, node=Dataset2Config)


# I have multiple training routines (listed only one here deriving from a common base class)


@dataclass
class TrainingConfig:
    pass


@dataclass
class SpecialTrainingConfig:
    some_member4: int = 4


cs.store(group="some/training/folder", name=SpecialTrainingConfig.__name__, node=SpecialTrainingConfig)


# finally I have multiple models that can be trained differently. Model1 is usually trained by the special training, so
# I want to have this as a default.


@dataclass
class ModelConfig:
    """This is just a common base class."""


@dataclass
class Model1Config(ModelConfig):
    defaults: List[Any] = field(default_factory=lambda: [{"/some/training/folder@training": "SpecialTrainingConfig"}])

    training: TrainingConfig = MISSING


@dataclass
class Model2Config(ModelConfig):
    some_member: int = 3


# I register them at some place in my folder structure.
cs.store(group="some/model/folder", name=Model1Config.__name__, node=Model1Config)
cs.store(group="some/model/folder", name=Model2Config.__name__, node=Model2Config)


# main.py:
# for the main routine of my machine learning project, I also have a config.
# usually I would use dataset1, so I have this as a default. But the model changes often and I don't want to use a
# default there.
@dataclass
class ScriptConfig:
    defaults: List[Any] = field(default_factory=lambda: [{"some/data/folder@dataset": "Dataset1Config"}, "_self_"])

    dataset: DataConfig = MISSING
    model: ModelConfig = MISSING


cs.store(name="ScriptConfig", node=ScriptConfig)


@hydra.main(config_name="my_config2", version_base="1.2", config_path=".")
def main(cfg):
    print(cfg)


if __name__ == "__main__":
    main()

the my_config2.yaml looks like this:

defaults:
  - ScriptConfig
  - /some/model/folder@model: Model1Config

** Stack trace/error message **

Traceback (most recent call last):
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/config_loader_impl.py", line 542, in _compose_config_from_defaults_list
    cfg.merge_with(loaded.config)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 492, in merge_with
    self._format_and_raise(key=None, value=None, cause=e)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/base.py", line 231, in _format_and_raise
    format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 819, in format_and_raise
    _raise(ex, cause)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 797, in _raise
    raise ex.with_traceback(sys.exc_info()[2])  # set env var OC_CAUSE=1 for full trace
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 490, in merge_with
    self._merge_with(*others)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 514, in _merge_with
    BaseContainer._map_merge(self, other)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 399, in _map_merge
    dest_node._merge_with(src_node)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 514, in _merge_with
    BaseContainer._map_merge(self, other)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 432, in _map_merge
    dest[key] = src._get_node(key)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 310, in __setitem__
    self._format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/base.py", line 231, in _format_and_raise
    format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 819, in format_and_raise
    _raise(ex, cause)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 797, in _raise
    raise ex.with_traceback(sys.exc_info()[2])  # set env var OC_CAUSE=1 for full trace
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 308, in __setitem__
    self.__set_impl(key=key, value=value)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 318, in __set_impl
    self._set_item_impl(key, value)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 545, in _set_item_impl
    self._validate_set(key, value)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 180, in _validate_set
    target = self._get_node(key) if key is not None else self
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 476, in _get_node
    self._validate_get(key)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 164, in _validate_get
    self._format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/base.py", line 231, in _format_and_raise
    format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 899, in format_and_raise
    _raise(ex, cause)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 797, in _raise
    raise ex.with_traceback(sys.exc_info()[2])  # set env var OC_CAUSE=1 for full trace
omegaconf.errors.ConfigKeyError: Key 'training' not in 'ModelConfig'
    full_key: model.training
    reference_type=ModelConfig
    object_type=ModelConfig
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
    _run_app(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/utils.py", line 457, in _run_app
    run_and_report(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/utils.py", line 222, in run_and_report
    raise ex
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/utils.py", line 219, in run_and_report
    return func()
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/utils.py", line 458, in <lambda>
    lambda: hydra.run(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/hydra.py", line 105, in run
    cfg = self.compose_config(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/hydra.py", line 594, in compose_config
    cfg = self.config_loader.load_configuration(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/config_loader_impl.py", line 142, in load_configuration
    return self._load_configuration_impl(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/config_loader_impl.py", line 263, in _load_configuration_impl
    cfg = self._compose_config_from_defaults_list(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/config_loader_impl.py", line 544, in _compose_config_from_defaults_list
    raise ConfigCompositionException(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/hydra/_internal/config_loader_impl.py", line 542, in _compose_config_from_defaults_list
    cfg.merge_with(loaded.config)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 492, in merge_with
    self._format_and_raise(key=None, value=None, cause=e)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/base.py", line 231, in _format_and_raise
    format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 819, in format_and_raise
    _raise(ex, cause)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 797, in _raise
    raise ex.with_traceback(sys.exc_info()[2])  # set env var OC_CAUSE=1 for full trace
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 490, in merge_with
    self._merge_with(*others)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 514, in _merge_with
    BaseContainer._map_merge(self, other)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 399, in _map_merge
    dest_node._merge_with(src_node)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 514, in _merge_with
    BaseContainer._map_merge(self, other)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 432, in _map_merge
    dest[key] = src._get_node(key)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 310, in __setitem__
    self._format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/base.py", line 231, in _format_and_raise
    format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 819, in format_and_raise
    _raise(ex, cause)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 797, in _raise
    raise ex.with_traceback(sys.exc_info()[2])  # set env var OC_CAUSE=1 for full trace
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 308, in __setitem__
    self.__set_impl(key=key, value=value)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 318, in __set_impl
    self._set_item_impl(key, value)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/basecontainer.py", line 545, in _set_item_impl
    self._validate_set(key, value)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 180, in _validate_set
    target = self._get_node(key) if key is not None else self
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 476, in _get_node
    self._validate_get(key)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/dictconfig.py", line 164, in _validate_get
    self._format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/base.py", line 231, in _format_and_raise
    format_and_raise(
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 899, in format_and_raise
    _raise(ex, cause)
  File "/home/zis2rng/Coding/letas_workspaces/dev1-letas/.venv/lib/python3.8/site-packages/omegaconf/_utils.py", line 797, in _raise
    raise ex.with_traceback(sys.exc_info()[2])  # set env var OC_CAUSE=1 for full trace
hydra.errors.ConfigCompositionException: In 'some/training/folder/SpecialTrainingConfig': ConfigKeyError raised while composing config:
Key 'training' not in 'ModelConfig'
    full_key: model.training
    reference_type=ModelConfig
    object_type=ModelConfig
python-BaseException

Expected Behavior

I would expect that this runs through smoothly as it does, when I nest only one level. E.g., by choosing the following my_config2.yaml:

defaults:
  - /some/model/folder@_global_: Model1Config

System information

  • Hydra Version : 1.3.1
  • Python version : 3.8.10
  • Virtual environment type and version : poetry/venv
  • Operating system : ubuntu

About this issue

  • Original URL
  • State: closed
  • Created a year ago
  • Comments: 20

Most upvoted comments

ok, after having studied your example once more, I understood the “pure OmegaConf example”. But I had not understood the hydra thing. So I once more took a look at the hydra code. With an even better idea, where to look, I realized, that all the values in the defaults list are turned into a tree with ordered children in the obvious way (see _create_defaults_tree()). Then the tree is turned into a list via a depth-first-walk (see _tree_to_list()). At that point, the defaults list more or less looks like the list in the “pure OmegaConf example” and the behavior is clear.

So my confusion originated from the fact, that I always assumed there would be a breadth-first-walk to really finish one config first before using it somewhere else.

Thank you again for having made this clear.

I created the following pure-omegaconf repro for the error message from your toy example:

# omc_repro.py
from dataclasses import dataclass
from omegaconf import MISSING, OmegaConf

@dataclass
class DataConfig: pass

@dataclass
class Dataset1Config(DataConfig):
    some_member1: int = 1

@dataclass
class TrainingConfig: pass

@dataclass
class SpecialTrainingConfig(TrainingConfig):
    some_member4: int = 4

@dataclass
class ModelConfig: pass

@dataclass
class Model1ConfigNoDefaults(ModelConfig):
    training: TrainingConfig = MISSING

@dataclass
class ScriptConfigNoDefaults:
    dataset: DataConfig = MISSING
    model: ModelConfig = MISSING

if __name__ == "__main__":
    defaults = [
        {"dataset": Dataset1Config()},
        ScriptConfigNoDefaults(),
        {"model": {"training": SpecialTrainingConfig()}},
        {"model": Model1ConfigNoDefaults()},
    ]
    cfg = OmegaConf.merge(*defaults)
    print(cfg)
$ python omc_repro.py
Traceback (most recent call last):
...
omegaconf.errors.ConfigKeyError: Key 'training' not in 'ModelConfig'
    full_key: model.training
    reference_type=ModelConfig
    object_type=ModelConfig

The above call to OmegaConf.merge is roughly equivalent to what Hydra does when composing the final config from the defaults list. The error is resolved by swapping the last two items in the list:

$ diff -u omc_repro.py omc_fixed.py
--- omc_repro.py        2023-04-18 11:01:14.878608463 -0500
+++ omc_fixed.py        2023-04-18 11:05:12.352116278 -0500
@@ -31,8 +31,8 @@
     defaults = [
         {"dataset": Dataset1Config()},
         ScriptConfigNoDefaults(),
-        {"model": {"training": SpecialTrainingConfig()}},
         {"model": Model1ConfigNoDefaults()},
+        {"model": {"training": SpecialTrainingConfig()}},
     ]
     cfg = OmegaConf.merge(*defaults)
     print(cfg)
$ python omc_fixed.py
{'dataset': {'some_member1': 1}, 'model': {'training': {'some_member4': 4}}}

Swapping those items in the defaults list corresponds to putting _self_ first in the Model1Config defaults.

The problem with the original order is that OmegaConf.merge works in an order-dependent manner; once the first two items in the defaults list are merged, there is no model.training key in the config, so setting model.training=SpecialTrainingConfig does not work. Setting model=Model1Config first resolves this; as merging Model1Config introduces a model.training key to the config, enabling model.training=SpecialTrainingConfig to succeed.

the second change is actually unwanted.

I see. In that case, try putting _self_ at the beginning of Model1Config defaults list. (When you omit _self_, the default behavor for Hydra 1.2 is equivalent to adding _self_ at the end of the defaults list.)

$ diff -u original.py modified.py
--- original.py 2023-04-18 10:56:53.408898323 -0500
+++ modified.py 2023-04-18 10:56:22.308733510 -0500
@@ -38,7 +38,7 @@


 @dataclass
-class SpecialTrainingConfig:
+class SpecialTrainingConfig(TrainingConfig):
     some_member4: int = 4


@@ -62,7 +62,8 @@
 class Model1Config(ModelConfig):
     defaults: List[Any] = field(
         default_factory=lambda: [
-            {"/some/training/folder@training": "SpecialTrainingConfig"}
+            "_self_",
+            {"/some/training/folder@training": "SpecialTrainingConfig"},
         ]
     )
 $ python modified.py
{'dataset': {'some_member1': 1}, 'model': {'training': {'some_member4': 4}}}

I’ll follow this comment with a note on why this works.

@SZiesche I’m not convinced that this is a bug. I’ve been able to get your toy example working by making two changes:

  • Make training a field of the base class ModelConfig rather than a field of the derived class Model1Config.
  • Make SpecialTrainingConfig into a subclass of TrainingConfig.

Here’s the diff:

$ diff -u original.py modified.py
--- original.py 2023-04-18 07:24:04.102760859 -0500
+++ modified.py 2023-04-18 07:24:43.863026153 -0500
@@ -38,7 +38,7 @@


 @dataclass
-class SpecialTrainingConfig:
+class SpecialTrainingConfig(TrainingConfig):
     some_member4: int = 4


@@ -53,13 +53,13 @@
 class ModelConfig:
     """This is just a common base class."""

+    training: TrainingConfig = MISSING
+

 @dataclass
 class Model1Config(ModelConfig):
     defaults: List[Any] = field(default_factory=lambda: [{"/some/training/folder@training": "SpecialTrainingConfig"}])

-    training: TrainingConfig = MISSING
-

 @dataclass
 class Model2Config(ModelConfig):

Here’s the output after modification:

$ python modified.py
{'dataset': {'some_member1': 1}, 'model': {'training': {'some_member4': 4}}}