Source code for langml.transformer.encoder

# -*- coding: utf-8 -*-

""" Yet another transformer implementation.
"""

# TODO: Transformer Decoder

from langml import TF_KERAS
if TF_KERAS:
    import tensorflow.keras.layers as L
else:
    import keras.layers as L

from langml.layers import MultiHeadAttention, LayerNorm
from langml.tensor_typing import Tensors, Activation
from langml.transformer import FeedForward
from langml.activations import gelu


[docs]class TransformerEncoder: def __init__(self, attention_heads: int, hidden_dim: int, attention_activation: Activation = None, feed_forward_activation: Activation = gelu, dropout_rate: float = 0.0, trainable: bool = True, name: str = 'Transformer-Encoder'): self.name = name self.dropout_rate = dropout_rate self.multihead_layer = MultiHeadAttention(head_num=attention_heads, return_attention=False, attention_activation=attention_activation, history_only=False, trainable=trainable, name=f'{self.name}-MultiHeadSelfAttention') if dropout_rate > 0.0: self.attn_dropout_layer = L.Dropout(rate=dropout_rate, name=f'{self.name}-MultiHeadSelfAttention-Dropout') self.attn_residual_layer = L.Add(name=f'{self.name}-MultiHeadSelfAttention-Add') self.attn_layer_norm = LayerNorm(name=f'{self.name}-MultiHeadSelfAttention-Norm', trainable=trainable) self.ffn_layer = FeedForward(hidden_dim, activation=feed_forward_activation, name=f'{self.name}-FeedForward') if dropout_rate > 0.0: self.ffn_dropout_layer = L.Dropout(rate=dropout_rate, name=f'{self.name}-FeedForward-Dropout') self.ffn_residual_layer = L.Add(name=f'{self.name}-FeedForward-Add') self.ffn_layer_norm = LayerNorm(name=f'{self.name}-FeedForward-Norm', trainable=trainable)
[docs] def __call__(self, inputs: Tensors) -> Tensors: attn_output = self.multihead_layer(inputs) if self.dropout_rate > 0.0: attn_output = self.attn_dropout_layer(attn_output) if isinstance(inputs, list): inputs = inputs[0] attn_output = self.attn_residual_layer([inputs, attn_output]) attn_output = self.attn_layer_norm(attn_output) ffn_output = self.ffn_layer(attn_output) if self.dropout_rate > 0.0: ffn_output = self.ffn_dropout_layer(ffn_output) ffn_output = self.ffn_residual_layer([attn_output, ffn_output]) ffn_output = self.ffn_layer_norm(ffn_output) return ffn_output
[docs]class TransformerEncoderBlock: def __init__(self, blocks: int, attention_heads: int, hidden_dim: int, attention_activation: Activation = None, feed_forward_activation: Activation = gelu, dropout_rate: float = 0.0, trainable: bool = False, name: str = 'TransformerEncoderBlock', share_weights: bool = False): if share_weights: encoder = TransformerEncoder(attention_heads, hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, name=name) self.encoders = [encoder for _ in range(blocks)] else: self.encoders = [ TransformerEncoder(attention_heads, hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, name=f'{name}-{i}') for i in range(blocks) ]
[docs] def __call__(self, inputs: Tensors) -> Tensors: output = inputs for encoder in self.encoders: output = encoder(output) return output