Source code for openvariant.annotation.builder

"""
Builder
====================================
A core Enum to build a specified Tuple for each annotation.
"""
import csv
import inspect
import os
import re
import glob
import gzip
import importlib
import importlib.util
from os.path import dirname
from typing import Tuple, Any, List, Callable

from openvariant.annotation.config_annotation import AnnotationKeys, AnnotationTypes
from openvariant.plugins.context import Context
from openvariant.plugins.plugin import Plugin


[docs]class Builder: """A representation of a function in annotation file""" func: str = None def __init__(self, func: str) -> None: """ Inits Builder with function or lambda in str format. Parameters --------- func : str A string that represents a function or a lambda described on the annotation file. """ self.func = func def __call__(self, x: Any) -> Any: return eval(self.func)(x)
StaticBuilder = Tuple[str, float or int or str] InternalBuilder = Tuple[str, List, Builder, str or float] DirnameBuilder = Tuple[str, Builder, re.Pattern] FilenameBuilder = Tuple[str, Builder, re.Pattern] PluginBuilder = Tuple[str, Callable, Context] MappingBuilder = Tuple[str, List, dict] def _get_function_and_regexp(x: dict) -> Tuple[Builder, re.Pattern]: """Get the function and regular expression of an annotation Parameters ---------- x : dict Annotation Returns ------- Builder Represents the function described on the annotation. re.Pattern Regular expression to parse the annotation. """ func_apply = Builder("(lambda y: y)") if AnnotationKeys.FUNCTION.value not in x \ else Builder(x[AnnotationKeys.FUNCTION.value]) try: regex_apply = re.compile('(.*)') if AnnotationKeys.REGEX.value not in x or x[AnnotationKeys.REGEX.value] is None \ else re.compile(x[AnnotationKeys.REGEX.value]) except re.error as e: raise re.error(f'Wrong regex pattern: {e}') return func_apply, regex_apply
[docs]class STATIC: def __call__(self, x: dict, base_path: str = None) -> StaticBuilder: """Built StaticBuilder from an annotation based on a static annotation with a fixed value. Parameters ---------- x : dict Annotation Returns ------- str Annotation type float or int or str Value of the field """ try: value = x[AnnotationKeys.VALUE.value] except KeyError: raise KeyError('Static annotation is wrong specified.') return AnnotationTypes.STATIC.name, value
[docs]class INTERNAL: def __call__(self, x: dict, base_path: str = None) -> InternalBuilder: """Built InternalBuilder from an annotation based on an internal annotation from fields of input files. Parameters ---------- x : dict Annotation Returns ------- str Annotation type str Value of the field Builder Representation of the function to apply on the annotation value """ try: value = x[AnnotationKeys.VALUE.value] except KeyError: value = None return AnnotationTypes.INTERNAL.name, x[AnnotationKeys.FIELD_SOURCE.value], Builder("(lambda y: y)") \ if AnnotationKeys.FUNCTION.value not in x or x[AnnotationKeys.FUNCTION.value] is None or \ len(x[AnnotationKeys.FUNCTION.value]) == 2 else Builder(x[AnnotationKeys.FUNCTION.value]), value
[docs]class DIRNAME: def __call__(self, x: dict, base_path: str = None) -> DirnameBuilder: """Built DirnameBuilder from an annotation based on a dirname annotation, getting the dirname which input files are located. Parameters ---------- x : dict Annotation Returns ------- str Annotation type Builder Representation of the function to apply on the annotation value (dirname). re.Pattern Representation of a regular expression to apply on the annotation value (dirname). """ func_apply, regex_apply = _get_function_and_regexp(x) return AnnotationTypes.DIRNAME.name, func_apply, regex_apply
[docs]class FILENAME: def __call__(self, x: dict, base_path: str = None) -> FilenameBuilder: """Built FilenameBuilder from an annotation based on a filename annotation, getting the filename of each input file. Parameters ---------- x : dict Annotation Returns ------- str Annotation type Builder Representation of the function to apply on the annotation value (filename). re.Pattern Representation of a regular expression to apply on the annotation value (filename). """ func_apply, regex_apply = _get_function_and_regexp(x) return AnnotationTypes.FILENAME.name, func_apply, regex_apply
def _get_plugin_function(mod) -> Callable: """Get the function from the module Parameters ---------- mod Plugin module where 'run' function is imported Returns ------- Callable 'run' function to execute data transformation """ func = None cls_members = inspect.getmembers(mod, inspect.isclass) for (_, c) in cls_members: if issubclass(c, Plugin) & (c is not Plugin): func = c().run break return func def _get_plugin_context(mod) -> Any: ctxt = None cls_members = inspect.getmembers(mod, inspect.isclass) for (_, c) in cls_members: if issubclass(c, Context) & (c is not Context): ctxt = c break return ctxt
[docs]class MAPPING: def __call__(self, x: dict, base_path: str) -> MappingBuilder: """Built MappingBuilder from an annotation based on a mapping annotation, it matches the value of the input file to a value that appears in the mapping file. It will return the value of one field of the mapping that has been indicated on the annotation. Parameters ---------- x : dict Annotation. base_path : str A base path where file that is parsing is located. Returns ------- str Annotation type. List Fields that has to look for in the input files. dict Schema of the mapping file, where 'key' is the value of one column (fieldMapping) in mapping file and 'value' is the value of one column (valueMapping) in the mapping file """ values: dict = {} mapping_files = x[AnnotationKeys.FILE_MAPPING.value] files = list(glob.iglob(f"{dirname(base_path)}/{mapping_files}", recursive=True)) if len(files) == 0: raise FileNotFoundError(f"Unable to find '{mapping_files}' file in '{dirname(base_path)}'") try: for mapping_file in files: open_method = gzip.open if mapping_file.endswith('gz') else open with open_method(mapping_file, "rt") as fd: for r in csv.DictReader(fd, delimiter='\t'): field = r[x[AnnotationKeys.FIELD_MAPPING.value]] val = r[x[AnnotationKeys.FIELD_VALUE.value]] values[field] = val break except TypeError: raise TypeError("Unable to parse mapping annotation") return AnnotationTypes.MAPPING.name, x[AnnotationKeys.FIELD_SOURCE.value], values
[docs]class PLUGIN: def __call__(self, x: dict, base_path: str = None) -> PluginBuilder: """Built PluginBuilder from an annotation based on a plugin annotation, from an internal or a customized plugin which data is transformed and executed thought a process. Parameters ---------- x : dict Annotation Returns ------- str Annotation type Builder Representation of the function to apply on the annotation value (plugin's 'run' function). """ func = None ctxt = None try: mod = importlib.import_module(f".{x[AnnotationTypes.PLUGIN.value]}", package="openvariant.plugins") func = _get_plugin_function(mod) ctxt = _get_plugin_context(mod) except ModuleNotFoundError: try: files = list(glob.iglob(f"{os.environ['OPENVAR_PLUGIN']}/**/{x[AnnotationTypes.PLUGIN.value]}", recursive=True)) if len(files) == 0: raise FileNotFoundError(f"Unable to find '{x[AnnotationTypes.PLUGIN.value]}' plugin in '{os.environ['OPENVAR_PLUGIN']}'") else: try: for package in files: spec = importlib.util.spec_from_file_location(f".{x[AnnotationTypes.PLUGIN.value]}", f"{package}/{x[AnnotationTypes.PLUGIN.value]}.py") mod = importlib.util.module_from_spec(spec) spec.loader.exec_module(mod) func = _get_plugin_function(mod) ctxt = _get_plugin_context(mod) except (ImportError, AttributeError): raise ImportError("Unable to import 'run' on the plugin.") except ModuleNotFoundError: raise ModuleNotFoundError(f"Unable to found '{x[AnnotationTypes.PLUGIN.value]}' plugin.") except (ImportError, AttributeError) as e: raise ImportError(f"Unable to import the plugin: {e}") return AnnotationTypes.PLUGIN.name, func, ctxt