langml.layers.attention

Module Contents

Classes

SelfAttention

SelfAdditiveAttention

ScaledDotProductAttention

ScaledDotProductAttention

MultiHeadAttention

MultiHeadAttention

GatedAttentionUnit

Gated Attention Unit

class langml.layers.attention.SelfAttention(attention_units: Optional[int] = None, return_attention: bool = False, is_residual: bool = False, attention_activation: langml.tensor_typing.Activation = 'relu', attention_epsilon: float = 10000000000.0, kernel_initializer: langml.tensor_typing.Initializer = 'glorot_normal', kernel_regularizer: Optional[langml.tensor_typing.Regularizer] = None, kernel_constraint: Optional[langml.tensor_typing.Constraint] = None, bias_initializer: langml.tensor_typing.Initializer = 'zeros', bias_regularizer: Optional[langml.tensor_typing.Regularizer] = None, bias_constraint: Optional[langml.tensor_typing.Constraint] = None, use_attention_bias: bool = True, attention_penalty_weight: float = 0.0, **kwargs)[source]

Bases: tensorflow.keras.layers.Layer

get_config(self) dict[source]
build(self, input_shape: langml.tensor_typing.Tensors)[source]
call(self, inputs: langml.tensor_typing.Tensors, mask: Optional[langml.tensor_typing.Tensors] = None, **kwargs) Union[List[langml.tensor_typing.Tensors], langml.tensor_typing.Tensors][source]
compute_mask(self, inputs: langml.tensor_typing.Tensors, mask: Optional[langml.tensor_typing.Tensors] = None) Union[List[Union[langml.tensor_typing.Tensors, None]], langml.tensor_typing.Tensors][source]
_attention_penalty(self, attention: langml.tensor_typing.Tensors) langml.tensor_typing.Tensors[source]
static get_custom_objects() dict[source]
compute_output_shape(self, input_shape: langml.tensor_typing.Tensors) Union[List[langml.tensor_typing.Tensors], langml.tensor_typing.Tensors][source]
class langml.layers.attention.SelfAdditiveAttention(attention_units: Optional[int] = None, return_attention: bool = False, is_residual: bool = False, attention_activation: langml.tensor_typing.Activation = 'relu', attention_epsilon: float = 10000000000.0, kernel_initializer: langml.tensor_typing.Initializer = 'glorot_normal', kernel_regularizer: Optional[langml.tensor_typing.Regularizer] = None, kernel_constraint: Optional[langml.tensor_typing.Constraint] = None, bias_initializer: langml.tensor_typing.Initializer = 'zeros', bias_regularizer: Optional[langml.tensor_typing.Regularizer] = None, bias_constraint: Optional[langml.tensor_typing.Constraint] = None, use_attention_bias: bool = True, attention_penalty_weight: float = 0.0, **kwargs)[source]

Bases: tensorflow.keras.layers.Layer

get_config(self) dict[source]
build(self, input_shape: langml.tensor_typing.Tensors)[source]
call(self, inputs: langml.tensor_typing.Tensors, mask: Optional[langml.tensor_typing.Tensors] = None, **kwargs) Union[List[langml.tensor_typing.Tensors], langml.tensor_typing.Tensors][source]
compute_mask(self, inputs: langml.tensor_typing.Tensors, mask: Optional[langml.tensor_typing.Tensors] = None) Union[List[Union[langml.tensor_typing.Tensors, None]], langml.tensor_typing.Tensors][source]
_attention_penalty(self, attention: langml.tensor_typing.Tensors) langml.tensor_typing.Tensors[source]
static get_custom_objects() dict[source]
compute_output_shape(self, input_shape: langml.tensor_typing.Tensors) Union[List[langml.tensor_typing.Tensors], langml.tensor_typing.Tensors][source]
class langml.layers.attention.ScaledDotProductAttention(return_attention: bool = False, history_only: bool = False, **kwargs)[source]

Bases: tensorflow.keras.layers.Layer

ScaledDotProductAttention

$Attention(Q, K, V) = softmax(frac{Q K^T}{sqrt{d_k}}) V$

https://arxiv.org/pdf/1706.03762.pdf

get_config(self) dict[source]
call(self, inputs: langml.tensor_typing.Tensors, mask: Optional[Union[langml.tensor_typing.Tensors, List[langml.tensor_typing.Tensors]]] = None, **kwargs) Union[List[langml.tensor_typing.Tensors], langml.tensor_typing.Tensors][source]
compute_mask(self, inputs: langml.tensor_typing.Tensors, mask: Optional[Union[langml.tensor_typing.Tensors, List[langml.tensor_typing.Tensors]]] = None) Union[List[Union[langml.tensor_typing.Tensors, None]], langml.tensor_typing.Tensors][source]
static get_custom_objects() dict[source]
compute_output_shape(self, input_shape: Union[langml.tensor_typing.Tensors, List[langml.tensor_typing.Tensors]]) Union[List[langml.tensor_typing.Tensors], langml.tensor_typing.Tensors][source]
class langml.layers.attention.MultiHeadAttention(head_num: int, return_attention: bool = False, attention_activation: langml.tensor_typing.Activation = 'relu', kernel_initializer: langml.tensor_typing.Initializer = 'glorot_normal', kernel_regularizer: Optional[langml.tensor_typing.Regularizer] = None, kernel_constraint: Optional[langml.tensor_typing.Constraint] = None, bias_initializer: langml.tensor_typing.Initializer = 'zeros', bias_regularizer: Optional[langml.tensor_typing.Regularizer] = None, bias_constraint: Optional[langml.tensor_typing.Constraint] = None, use_attention_bias: bool = True, history_only: bool = False, **kwargs)[source]

Bases: tensorflow.keras.layers.Layer

MultiHeadAttention https://arxiv.org/pdf/1706.03762.pdf

get_config(self) dict[source]
build(self, input_shape: langml.tensor_typing.Tensors)[source]
static _reshape_to_batches(x, head_num)[source]
static _reshape_attention_from_batches(x, head_num)[source]
static _reshape_from_batches(x, head_num)[source]
static _reshape_mask(mask, head_num)[source]
call(self, inputs: langml.tensor_typing.Tensors, mask: Optional[langml.tensor_typing.Tensors] = None, **kwargs) langml.tensor_typing.Tensors[source]
static get_custom_objects() dict[source]
compute_mask(self, inputs: langml.tensor_typing.Tensors, mask: Optional[langml.tensor_typing.Tensors] = None) Union[List[Union[langml.tensor_typing.Tensors, None]], langml.tensor_typing.Tensors][source]
compute_output_shape(self, input_shape: Union[langml.tensor_typing.Tensors, List[langml.tensor_typing.Tensors]]) Union[List[langml.tensor_typing.Tensors], langml.tensor_typing.Tensors][source]
class langml.layers.attention.GatedAttentionUnit(attention_units: int, attention_activation: langml.tensor_typing.Activation = 'relu', attention_normalizer: langml.tensor_typing.Activation = relu2, attention_epsilon: float = 10000000000.0, kernel_initializer: langml.tensor_typing.Initializer = 'glorot_normal', kernel_regularizer: Optional[langml.tensor_typing.Regularizer] = None, kernel_constraint: Optional[langml.tensor_typing.Constraint] = None, bias_initializer: langml.tensor_typing.Initializer = 'zeros', bias_regularizer: Optional[langml.tensor_typing.Regularizer] = None, bias_constraint: Optional[langml.tensor_typing.Constraint] = None, use_attention_bias: bool = True, use_attention_scale: bool = True, use_relative_position: bool = True, use_offset: bool = True, use_scale: bool = True, is_residual: bool = True, **kwargs)[source]

Bases: tensorflow.keras.layers.Layer

Gated Attention Unit https://arxiv.org/abs/2202.10447

get_config(self) dict[source]
build(self, input_shape: langml.tensor_typing.Tensors)[source]
apply_rotary_position_embeddings(self, sinusoidal: langml.tensor_typing.Tensors, *tensors)[source]

apply RoPE modified from: https://github.com/bojone/bert4keras/blob/master/bert4keras/backend.py#L310

attn(self, x: langml.tensor_typing.Tensors, v: langml.tensor_typing.Tensors, mask: Optional[langml.tensor_typing.Tensors] = None) langml.tensor_typing.Tensors[source]
call(self, inputs: langml.tensor_typing.Tensors, mask: Optional[langml.tensor_typing.Tensors] = None, **kwargs) langml.tensor_typing.Tensors[source]
compute_mask(self, inputs: langml.tensor_typing.Tensors, mask: Optional[langml.tensor_typing.Tensors] = None) langml.tensor_typing.Tensors[source]
compute_output_shape(self, input_shape: langml.tensor_typing.Tensors) langml.tensor_typing.Tensors[source]
static get_custom_objects() dict[source]