llama : Llama-3_1-Nemotron-Ultra-253B-v1 support (#12843)
This commit is contained in:
parent
1d36b3670b
commit
3bf785f3ef
3 changed files with 24 additions and 5 deletions
|
@ -2123,6 +2123,9 @@ class DeciModel(TextModel):
|
|||
# if n_heads_in_group is not None, then
|
||||
# _num_kv_heads[il] is num_attention_head // n_heads_in_group and
|
||||
# _num_heads[il] is num_attention_head
|
||||
# ***dummy layer*** for nemotron 253B
|
||||
# if n_heads_in_group is None and ffn_mult is None
|
||||
# then _num_kv_heads[il] is 0 and _num_heads[il] is 0 and _ffn_dims is 0
|
||||
for il in range(len(_block_configs)):
|
||||
if _block_configs[il]["attention"]["n_heads_in_group"] is None:
|
||||
if _block_configs[il]["attention"]["replace_with_linear"] is True:
|
||||
|
@ -2134,7 +2137,10 @@ class DeciModel(TextModel):
|
|||
else:
|
||||
self._num_kv_heads.append(self.hparams["num_attention_heads"] // _block_configs[il]["attention"]["n_heads_in_group"])
|
||||
self._num_heads.append(self.hparams["num_attention_heads"])
|
||||
_ffn_multipliers.append(_block_configs[il]["ffn"]["ffn_mult"])
|
||||
if _block_configs[il]["ffn"]["ffn_mult"] is None: # dummy layer
|
||||
_ffn_multipliers.append(0.0)
|
||||
else:
|
||||
_ffn_multipliers.append(_block_configs[il]["ffn"]["ffn_mult"])
|
||||
assert self.block_count == len(self._num_kv_heads)
|
||||
assert self.block_count == len(self._num_heads)
|
||||
assert self.block_count == len(_ffn_multipliers)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue