| from gensim.models import KeyedVectors | |
| from typing import List, Dict | |
| class PreTrainedPipeline: | |
| def __init__(self, path=""): | |
| from huggingface_hub import hf_hub_download | |
| self.model = KeyedVectors.load_word2vec_format( | |
| hf_hub_download(repo_id="lang-uk/word2vec-uk", filename="ubercorpus.cased.tokenized.300d"), binary=False | |
| ) | |
| def __call__(self, inputs: str) -> List[Dict]: | |
| """ | |
| Args: | |
| inputs (:obj:`str`): | |
| a string containing some text | |
| Return: | |
| A :obj:`str` | |
| """ | |
| inputs = inputs.strip() | |
| return [{"generated_text": ", \n\n".join(f"{k}" for k, v in self.model.most_similar(inputs, topn=30))}] | |