We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b3571fb commit afee1b7Copy full SHA for afee1b7
src/megatron/bridge/models/nemotronh/nemotron_h_provider.py
@@ -18,6 +18,7 @@
18
from typing import Callable
19
20
from megatron.core.activations import squared_relu
21
+from megatron.core.transformer.enums import AttnBackend
22
23
from megatron.bridge.models.mamba.mamba_provider import MambaModelProvider
24
from megatron.bridge.utils.common_utils import get_rank_safe
@@ -102,6 +103,8 @@ class NemotronHModelProvider56B(NemotronHModelProvider):
102
103
ffn_hidden_size: int = 32768
104
num_attention_heads: int = 64
105
106
+ attention_backend: AttnBackend = AttnBackend.auto
107
+
108
109
@dataclass
110
class NemotronNanoModelProvider9Bv2(NemotronHModelProvider):
0 commit comments