Source code for langml.common.evaluator.spearman

# -*- coding: utf-8 -*-

from typing import List, Tuple

import numpy as np
from scipy.stats import spearmanr
from sklearn.preprocessing import normalize

from langml.utils import pad_sequences
from langml.tensor_typing import Models
from langml.tokenizer import Tokenizer


[docs]class SpearmanEvaluator: def __init__(self, encoder: Models, tokenizer: Tokenizer) -> None: self.encoder = encoder self.tokenizer = tokenizer
[docs] def compute_corrcoef(self, data: List[Tuple[str, str, int]]) -> float: left_token_ids = [] right_token_ids = [] labels = [] for text_left, text_right, label in data: tokenized = self.tokenizer.encode(text_left) left_token_ids.append(tokenized.ids) tokenized = self.tokenizer.encode(text_right) right_token_ids.append(tokenized.ids) labels.append(float(label)) left_token_ids = pad_sequences(left_token_ids, padding='post', truncating='post') right_token_ids = pad_sequences(right_token_ids, padding='post', truncating='post') left_vecs = self.encoder.predict([left_token_ids, np.zeros_like(left_token_ids)], verbose=True) right_vecs = self.encoder.predict([right_token_ids, np.zeros_like(right_token_ids)], verbose=True) left_vecs = normalize(left_vecs, norm='l2') right_vecs = normalize(right_vecs, norm='l2') similarity = (left_vecs * right_vecs).sum(axis=1) return spearmanr(labels, similarity).correlation