Source code for SciAssist.pipelines.pipeline

# main developer: Yixi Ding <dingyixi@hotmail.com>

import torch

from SciAssist import BASE_OUTPUT_DIR, BASE_TEMP_DIR, BASE_CACHE_DIR
from SciAssist.pipelines import TASKS, load_model


[docs]class Pipeline(): """ Args: task_name (`str`): The task name, which is used to load model configs. model_name (`str`, *optional*): A string, the *model name* of a pretrained model provided for this task. device (`str`, *optional*): A string, `cpu` or `gpu`. cache_dir (`str` or `os.PathLike`, *optional*, default to "~/.cache/sciassist"): Path to a directory in which a downloaded pretrained model should be cached if the standard cache should not be used. output_dir (`str` or `os.PathLike`, *optional*, default to "output/result" from current work directory): Path to a directory in which the predicted results files should be stored. temp_dir (`str` or `os.PathLike`, *optional*, default to "output/.temp" from current work directory): Path to a directory which holds temporary files such as `.tei.xml`. """ def __init__(self, task_name: str, model_name: str = "default", device="gpu", cache_dir=None, output_dir=None, temp_dir=None): self.device = device self.cache_dir = cache_dir if cache_dir is not None else BASE_CACHE_DIR self.output_dir = output_dir if output_dir is not None else BASE_OUTPUT_DIR self.temp_dir = temp_dir if temp_dir is not None else BASE_TEMP_DIR self.config = TASKS[task_name][model_name] self.model_name = model_name self.model = load_model(config=self.config, cache_dir=self.cache_dir) if device in ["cuda", "gpu"] and torch.cuda.is_available(): self.device = torch.device("cuda") self.model.cuda() else: self.device = torch.device("cpu") self.data_utils = self.config["data_utils"]