Module `pylars.utils.output`

Expand source code

from .input import run
import pylars
import pandas as pd
from glob import glob


class processed_dataset():
    """Main data type for processed data. Holds information on the
    type of dataset, the df of results and the methods for
    saving/loading cached processed files.
    """

    def __init__(self, run: run, kind: str, vbias: float,
                 temp: float, path_processed: str, process_hash: str = ''):
        self.run = run
        self.kind = kind
        self.vbias = vbias
        self.temp = temp
        self.process_hash = process_hash

        self.path_processed = path_processed

        self.hash = self.__hash__()

    def __hash__(self):
        return str(hash((self.process_hash, hash(
            (self.run.run_number, self.kind, self.vbias, self.temp)))))

    def input_data(self, results_df: pd.DataFrame) -> None:
        """Input the data pd.DataFrame to the processed_dataset object

        Args:
            results_df (pd.DataFrame): dataframe with the processed data results
        """
        self.data = results_df

    @staticmethod
    def format_kind_v_t(kind: str, v: float, t: float) -> str:
        string = f'{kind}_{t:.2f}_{v:.2f}'
        return string

    def save_data(self, type: str = 'hdf5') -> None:
        """Saves the data within the processed_dataset object
        to a file.

        Args:
            type (str, optional): type of file to save the data as. Can be
                'hdf5' or 'csv'. Defaults to 'hdf5'.

        Raises:
            AssertionError: Raises the object has no data attached.
            NotImplementedError: Raises if the file format is
                different from 'hdf5' or 'csv'.
        """
        if not isinstance(self.data, pd.DataFrame):
            raise AssertionError(
                "Data not loaded to object, run input_data() first.")

        file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)

        if type == 'hdf5':
            file_name = f'{file_config}-{self.process_hash}.h5'
            file_path = f'{self.path_processed}run{self.run.run_number}/'
            self.data.to_hdf(
                file_path + file_name,
                key='data',
                complevel=5)
            print('Saved file to:', file_path + file_name)

        elif type == 'csv':
            file_name = f'{file_config}-{self.process_hash}.csv'
            file_path = f'{self.path_processed}run{self.run.run_number}/'
            self.data.to_csv(file_path + file_name)
            print('Saved file to:', file_path + file_name)

        else:
            raise NotImplementedError(
                f"The requested type ({type}) is not" +
                "implemented. Choose 'hdf5', 'csv' or make a PR.")

    def load_data(self, verbose: int = 0, force: bool = False) -> None:
        """Load cached processed data for a given processed data configuration.

        Args:
            force (bool, optional): If True and no cached file found,
                processes the raw_data (if found) with default processor
                options. Defaults to False.

        Raises:
            FileNotFoundError: If force=False and file not found.
        """
        file_path = f'{self.path_processed}run{self.run.run_number}/'
        file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)
        file_name = f'{file_path + file_config}-{self.process_hash}'

        if not force:
            try:
                try:

                    self.data = pd.read_hdf(file_name + '.h5')
                    if verbose > 0:
                        print('Loaded file: ', file_name + '.h5')

                except BaseException:
                    self.data = pd.read_csv(file_name + '.csv')
                    if verbose > 0:
                        print('Loaded file: ', file_name + '.csv')

            except BaseException:
                raise FileNotFoundError(
                    "Requested processed data not found. Process and save "
                    "with load_data(force=True) or process and save with "
                    "save_data.")

        else:

            if len(glob(file_name + '.h5')) > 0:
                self.data = pd.read_hdf(file_name + '.h5')
                if verbose > 0:
                    print('Loaded file: ', file_name + '.h5')

            else:
                processor = pylars.processing.rawprocessor.run_processor(
                    run_to_process=self.run,
                    processor_type='simple',
                    sigma_level=5,
                    baseline_samples=50)
                if verbose > 0:
                    print((
                        f'Using Default values for sigma '
                        f'({processor.sigma_level}) and baseline samples '
                        f'({processor.baseline_samples}) calculation.'))

                data = processor.process_datasets(
                    kind=self.kind, vbias=self.vbias, temp=self.temp)

                self.data = data

                self.save_data()
                if verbose > 0:
                    print('Processed and saved file: ', file_name + '.h5')

Classes

class processed_dataset (run: run, kind: str, vbias: float, temp: float, path_processed: str, process_hash: str = '')

Main data type for processed data. Holds information on the type of dataset, the df of results and the methods for saving/loading cached processed files.

Expand source code

class processed_dataset():
    """Main data type for processed data. Holds information on the
    type of dataset, the df of results and the methods for
    saving/loading cached processed files.
    """

    def __init__(self, run: run, kind: str, vbias: float,
                 temp: float, path_processed: str, process_hash: str = ''):
        self.run = run
        self.kind = kind
        self.vbias = vbias
        self.temp = temp
        self.process_hash = process_hash

        self.path_processed = path_processed

        self.hash = self.__hash__()

    def __hash__(self):
        return str(hash((self.process_hash, hash(
            (self.run.run_number, self.kind, self.vbias, self.temp)))))

    def input_data(self, results_df: pd.DataFrame) -> None:
        """Input the data pd.DataFrame to the processed_dataset object

        Args:
            results_df (pd.DataFrame): dataframe with the processed data results
        """
        self.data = results_df

    @staticmethod
    def format_kind_v_t(kind: str, v: float, t: float) -> str:
        string = f'{kind}_{t:.2f}_{v:.2f}'
        return string

    def save_data(self, type: str = 'hdf5') -> None:
        """Saves the data within the processed_dataset object
        to a file.

        Args:
            type (str, optional): type of file to save the data as. Can be
                'hdf5' or 'csv'. Defaults to 'hdf5'.

        Raises:
            AssertionError: Raises the object has no data attached.
            NotImplementedError: Raises if the file format is
                different from 'hdf5' or 'csv'.
        """
        if not isinstance(self.data, pd.DataFrame):
            raise AssertionError(
                "Data not loaded to object, run input_data() first.")

        file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)

        if type == 'hdf5':
            file_name = f'{file_config}-{self.process_hash}.h5'
            file_path = f'{self.path_processed}run{self.run.run_number}/'
            self.data.to_hdf(
                file_path + file_name,
                key='data',
                complevel=5)
            print('Saved file to:', file_path + file_name)

        elif type == 'csv':
            file_name = f'{file_config}-{self.process_hash}.csv'
            file_path = f'{self.path_processed}run{self.run.run_number}/'
            self.data.to_csv(file_path + file_name)
            print('Saved file to:', file_path + file_name)

        else:
            raise NotImplementedError(
                f"The requested type ({type}) is not" +
                "implemented. Choose 'hdf5', 'csv' or make a PR.")

    def load_data(self, verbose: int = 0, force: bool = False) -> None:
        """Load cached processed data for a given processed data configuration.

        Args:
            force (bool, optional): If True and no cached file found,
                processes the raw_data (if found) with default processor
                options. Defaults to False.

        Raises:
            FileNotFoundError: If force=False and file not found.
        """
        file_path = f'{self.path_processed}run{self.run.run_number}/'
        file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)
        file_name = f'{file_path + file_config}-{self.process_hash}'

        if not force:
            try:
                try:

                    self.data = pd.read_hdf(file_name + '.h5')
                    if verbose > 0:
                        print('Loaded file: ', file_name + '.h5')

                except BaseException:
                    self.data = pd.read_csv(file_name + '.csv')
                    if verbose > 0:
                        print('Loaded file: ', file_name + '.csv')

            except BaseException:
                raise FileNotFoundError(
                    "Requested processed data not found. Process and save "
                    "with load_data(force=True) or process and save with "
                    "save_data.")

        else:

            if len(glob(file_name + '.h5')) > 0:
                self.data = pd.read_hdf(file_name + '.h5')
                if verbose > 0:
                    print('Loaded file: ', file_name + '.h5')

            else:
                processor = pylars.processing.rawprocessor.run_processor(
                    run_to_process=self.run,
                    processor_type='simple',
                    sigma_level=5,
                    baseline_samples=50)
                if verbose > 0:
                    print((
                        f'Using Default values for sigma '
                        f'({processor.sigma_level}) and baseline samples '
                        f'({processor.baseline_samples}) calculation.'))

                data = processor.process_datasets(
                    kind=self.kind, vbias=self.vbias, temp=self.temp)

                self.data = data

                self.save_data()
                if verbose > 0:
                    print('Processed and saved file: ', file_name + '.h5')

Static methods

def format_kind_v_t(kind: str, v: float, t: float) ‑> str

Expand source code

@staticmethod
def format_kind_v_t(kind: str, v: float, t: float) -> str:
    string = f'{kind}_{t:.2f}_{v:.2f}'
    return string

Methods

def input_data(self, results_df: pandas.core.frame.DataFrame) ‑> NoneType

Input the data pd.DataFrame to the processed_dataset object

Args

results_df : pd.DataFrame: dataframe with the processed data results

Expand source code

def input_data(self, results_df: pd.DataFrame) -> None:
    """Input the data pd.DataFrame to the processed_dataset object

    Args:
        results_df (pd.DataFrame): dataframe with the processed data results
    """
    self.data = results_df

def load_data(self, verbose: int = 0, force: bool = False) ‑> NoneType

Load cached processed data for a given processed data configuration.

Args

force : bool, optional: If True and no cached file found, processes the raw_data (if found) with default processor options. Defaults to False.

Raises

FileNotFoundError: If force=False and file not found.

Expand source code

def load_data(self, verbose: int = 0, force: bool = False) -> None:
    """Load cached processed data for a given processed data configuration.

    Args:
        force (bool, optional): If True and no cached file found,
            processes the raw_data (if found) with default processor
            options. Defaults to False.

    Raises:
        FileNotFoundError: If force=False and file not found.
    """
    file_path = f'{self.path_processed}run{self.run.run_number}/'
    file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)
    file_name = f'{file_path + file_config}-{self.process_hash}'

    if not force:
        try:
            try:

                self.data = pd.read_hdf(file_name + '.h5')
                if verbose > 0:
                    print('Loaded file: ', file_name + '.h5')

            except BaseException:
                self.data = pd.read_csv(file_name + '.csv')
                if verbose > 0:
                    print('Loaded file: ', file_name + '.csv')

        except BaseException:
            raise FileNotFoundError(
                "Requested processed data not found. Process and save "
                "with load_data(force=True) or process and save with "
                "save_data.")

    else:

        if len(glob(file_name + '.h5')) > 0:
            self.data = pd.read_hdf(file_name + '.h5')
            if verbose > 0:
                print('Loaded file: ', file_name + '.h5')

        else:
            processor = pylars.processing.rawprocessor.run_processor(
                run_to_process=self.run,
                processor_type='simple',
                sigma_level=5,
                baseline_samples=50)
            if verbose > 0:
                print((
                    f'Using Default values for sigma '
                    f'({processor.sigma_level}) and baseline samples '
                    f'({processor.baseline_samples}) calculation.'))

            data = processor.process_datasets(
                kind=self.kind, vbias=self.vbias, temp=self.temp)

            self.data = data

            self.save_data()
            if verbose > 0:
                print('Processed and saved file: ', file_name + '.h5')

def save_data(self, type: str = 'hdf5') ‑> NoneType

Saves the data within the processed_dataset object to a file.

Args

type : str, optional: type of file to save the data as. Can be 'hdf5' or 'csv'. Defaults to 'hdf5'.

Raises

AssertionError: Raises the object has no data attached.
NotImplementedError: Raises if the file format is different from 'hdf5' or 'csv'.

Expand source code

def save_data(self, type: str = 'hdf5') -> None:
    """Saves the data within the processed_dataset object
    to a file.

    Args:
        type (str, optional): type of file to save the data as. Can be
            'hdf5' or 'csv'. Defaults to 'hdf5'.

    Raises:
        AssertionError: Raises the object has no data attached.
        NotImplementedError: Raises if the file format is
            different from 'hdf5' or 'csv'.
    """
    if not isinstance(self.data, pd.DataFrame):
        raise AssertionError(
            "Data not loaded to object, run input_data() first.")

    file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)

    if type == 'hdf5':
        file_name = f'{file_config}-{self.process_hash}.h5'
        file_path = f'{self.path_processed}run{self.run.run_number}/'
        self.data.to_hdf(
            file_path + file_name,
            key='data',
            complevel=5)
        print('Saved file to:', file_path + file_name)

    elif type == 'csv':
        file_name = f'{file_config}-{self.process_hash}.csv'
        file_path = f'{self.path_processed}run{self.run.run_number}/'
        self.data.to_csv(file_path + file_name)
        print('Saved file to:', file_path + file_name)

    else:
        raise NotImplementedError(
            f"The requested type ({type}) is not" +
            "implemented. Choose 'hdf5', 'csv' or make a PR.")