Module pylars.utils.output

Expand source code
from .input import run
import pylars
import pandas as pd
from glob import glob


class processed_dataset():
    """Main data type for processed data. Holds information on the
    type of dataset, the df of results and the methods for
    saving/loading cached processed files.
    """

    def __init__(self, run: run, kind: str, vbias: float,
                 temp: float, path_processed: str, process_hash: str = ''):
        self.run = run
        self.kind = kind
        self.vbias = vbias
        self.temp = temp
        self.process_hash = process_hash

        self.path_processed = path_processed

        self.hash = self.__hash__()

    def __hash__(self):
        return str(hash((self.process_hash, hash(
            (self.run.run_number, self.kind, self.vbias, self.temp)))))

    def input_data(self, results_df: pd.DataFrame) -> None:
        """Input the data pd.DataFrame to the processed_dataset object

        Args:
            results_df (pd.DataFrame): dataframe with the processed data results
        """
        self.data = results_df

    @staticmethod
    def format_kind_v_t(kind: str, v: float, t: float) -> str:
        string = f'{kind}_{t:.2f}_{v:.2f}'
        return string

    def save_data(self, type: str = 'hdf5') -> None:
        """Saves the data within the processed_dataset object
        to a file.

        Args:
            type (str, optional): type of file to save the data as. Can be
                'hdf5' or 'csv'. Defaults to 'hdf5'.

        Raises:
            AssertionError: Raises the object has no data attached.
            NotImplementedError: Raises if the file format is
                different from 'hdf5' or 'csv'.
        """
        if not isinstance(self.data, pd.DataFrame):
            raise AssertionError(
                "Data not loaded to object, run input_data() first.")

        file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)

        if type == 'hdf5':
            file_name = f'{file_config}-{self.process_hash}.h5'
            file_path = f'{self.path_processed}run{self.run.run_number}/'
            self.data.to_hdf(
                file_path + file_name,
                key='data',
                complevel=5)
            print('Saved file to:', file_path + file_name)

        elif type == 'csv':
            file_name = f'{file_config}-{self.process_hash}.csv'
            file_path = f'{self.path_processed}run{self.run.run_number}/'
            self.data.to_csv(file_path + file_name)
            print('Saved file to:', file_path + file_name)

        else:
            raise NotImplementedError(
                f"The requested type ({type}) is not" +
                "implemented. Choose 'hdf5', 'csv' or make a PR.")

    def load_data(self, verbose: int = 0, force: bool = False) -> None:
        """Load cached processed data for a given processed data configuration.

        Args:
            force (bool, optional): If True and no cached file found,
                processes the raw_data (if found) with default processor
                options. Defaults to False.

        Raises:
            FileNotFoundError: If force=False and file not found.
        """
        file_path = f'{self.path_processed}run{self.run.run_number}/'
        file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)
        file_name = f'{file_path + file_config}-{self.process_hash}'

        if not force:
            try:
                try:

                    self.data = pd.read_hdf(file_name + '.h5')
                    if verbose > 0:
                        print('Loaded file: ', file_name + '.h5')

                except BaseException:
                    self.data = pd.read_csv(file_name + '.csv')
                    if verbose > 0:
                        print('Loaded file: ', file_name + '.csv')

            except BaseException:
                raise FileNotFoundError(
                    "Requested processed data not found. Process and save "
                    "with load_data(force=True) or process and save with "
                    "save_data.")

        else:

            if len(glob(file_name + '.h5')) > 0:
                self.data = pd.read_hdf(file_name + '.h5')
                if verbose > 0:
                    print('Loaded file: ', file_name + '.h5')

            else:
                processor = pylars.processing.rawprocessor.run_processor(
                    run_to_process=self.run,
                    processor_type='simple',
                    sigma_level=5,
                    baseline_samples=50)
                if verbose > 0:
                    print((
                        f'Using Default values for sigma '
                        f'({processor.sigma_level}) and baseline samples '
                        f'({processor.baseline_samples}) calculation.'))

                data = processor.process_datasets(
                    kind=self.kind, vbias=self.vbias, temp=self.temp)

                self.data = data

                self.save_data()
                if verbose > 0:
                    print('Processed and saved file: ', file_name + '.h5')

Classes

class processed_dataset (run: run, kind: str, vbias: float, temp: float, path_processed: str, process_hash: str = '')

Main data type for processed data. Holds information on the type of dataset, the df of results and the methods for saving/loading cached processed files.

Expand source code
class processed_dataset():
    """Main data type for processed data. Holds information on the
    type of dataset, the df of results and the methods for
    saving/loading cached processed files.
    """

    def __init__(self, run: run, kind: str, vbias: float,
                 temp: float, path_processed: str, process_hash: str = ''):
        self.run = run
        self.kind = kind
        self.vbias = vbias
        self.temp = temp
        self.process_hash = process_hash

        self.path_processed = path_processed

        self.hash = self.__hash__()

    def __hash__(self):
        return str(hash((self.process_hash, hash(
            (self.run.run_number, self.kind, self.vbias, self.temp)))))

    def input_data(self, results_df: pd.DataFrame) -> None:
        """Input the data pd.DataFrame to the processed_dataset object

        Args:
            results_df (pd.DataFrame): dataframe with the processed data results
        """
        self.data = results_df

    @staticmethod
    def format_kind_v_t(kind: str, v: float, t: float) -> str:
        string = f'{kind}_{t:.2f}_{v:.2f}'
        return string

    def save_data(self, type: str = 'hdf5') -> None:
        """Saves the data within the processed_dataset object
        to a file.

        Args:
            type (str, optional): type of file to save the data as. Can be
                'hdf5' or 'csv'. Defaults to 'hdf5'.

        Raises:
            AssertionError: Raises the object has no data attached.
            NotImplementedError: Raises if the file format is
                different from 'hdf5' or 'csv'.
        """
        if not isinstance(self.data, pd.DataFrame):
            raise AssertionError(
                "Data not loaded to object, run input_data() first.")

        file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)

        if type == 'hdf5':
            file_name = f'{file_config}-{self.process_hash}.h5'
            file_path = f'{self.path_processed}run{self.run.run_number}/'
            self.data.to_hdf(
                file_path + file_name,
                key='data',
                complevel=5)
            print('Saved file to:', file_path + file_name)

        elif type == 'csv':
            file_name = f'{file_config}-{self.process_hash}.csv'
            file_path = f'{self.path_processed}run{self.run.run_number}/'
            self.data.to_csv(file_path + file_name)
            print('Saved file to:', file_path + file_name)

        else:
            raise NotImplementedError(
                f"The requested type ({type}) is not" +
                "implemented. Choose 'hdf5', 'csv' or make a PR.")

    def load_data(self, verbose: int = 0, force: bool = False) -> None:
        """Load cached processed data for a given processed data configuration.

        Args:
            force (bool, optional): If True and no cached file found,
                processes the raw_data (if found) with default processor
                options. Defaults to False.

        Raises:
            FileNotFoundError: If force=False and file not found.
        """
        file_path = f'{self.path_processed}run{self.run.run_number}/'
        file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)
        file_name = f'{file_path + file_config}-{self.process_hash}'

        if not force:
            try:
                try:

                    self.data = pd.read_hdf(file_name + '.h5')
                    if verbose > 0:
                        print('Loaded file: ', file_name + '.h5')

                except BaseException:
                    self.data = pd.read_csv(file_name + '.csv')
                    if verbose > 0:
                        print('Loaded file: ', file_name + '.csv')

            except BaseException:
                raise FileNotFoundError(
                    "Requested processed data not found. Process and save "
                    "with load_data(force=True) or process and save with "
                    "save_data.")

        else:

            if len(glob(file_name + '.h5')) > 0:
                self.data = pd.read_hdf(file_name + '.h5')
                if verbose > 0:
                    print('Loaded file: ', file_name + '.h5')

            else:
                processor = pylars.processing.rawprocessor.run_processor(
                    run_to_process=self.run,
                    processor_type='simple',
                    sigma_level=5,
                    baseline_samples=50)
                if verbose > 0:
                    print((
                        f'Using Default values for sigma '
                        f'({processor.sigma_level}) and baseline samples '
                        f'({processor.baseline_samples}) calculation.'))

                data = processor.process_datasets(
                    kind=self.kind, vbias=self.vbias, temp=self.temp)

                self.data = data

                self.save_data()
                if verbose > 0:
                    print('Processed and saved file: ', file_name + '.h5')

Static methods

def format_kind_v_t(kind: str, v: float, t: float) ‑> str
Expand source code
@staticmethod
def format_kind_v_t(kind: str, v: float, t: float) -> str:
    string = f'{kind}_{t:.2f}_{v:.2f}'
    return string

Methods

def input_data(self, results_df: pandas.core.frame.DataFrame) ‑> NoneType

Input the data pd.DataFrame to the processed_dataset object

Args

results_df : pd.DataFrame
dataframe with the processed data results
Expand source code
def input_data(self, results_df: pd.DataFrame) -> None:
    """Input the data pd.DataFrame to the processed_dataset object

    Args:
        results_df (pd.DataFrame): dataframe with the processed data results
    """
    self.data = results_df
def load_data(self, verbose: int = 0, force: bool = False) ‑> NoneType

Load cached processed data for a given processed data configuration.

Args

force : bool, optional
If True and no cached file found, processes the raw_data (if found) with default processor options. Defaults to False.

Raises

FileNotFoundError
If force=False and file not found.
Expand source code
def load_data(self, verbose: int = 0, force: bool = False) -> None:
    """Load cached processed data for a given processed data configuration.

    Args:
        force (bool, optional): If True and no cached file found,
            processes the raw_data (if found) with default processor
            options. Defaults to False.

    Raises:
        FileNotFoundError: If force=False and file not found.
    """
    file_path = f'{self.path_processed}run{self.run.run_number}/'
    file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)
    file_name = f'{file_path + file_config}-{self.process_hash}'

    if not force:
        try:
            try:

                self.data = pd.read_hdf(file_name + '.h5')
                if verbose > 0:
                    print('Loaded file: ', file_name + '.h5')

            except BaseException:
                self.data = pd.read_csv(file_name + '.csv')
                if verbose > 0:
                    print('Loaded file: ', file_name + '.csv')

        except BaseException:
            raise FileNotFoundError(
                "Requested processed data not found. Process and save "
                "with load_data(force=True) or process and save with "
                "save_data.")

    else:

        if len(glob(file_name + '.h5')) > 0:
            self.data = pd.read_hdf(file_name + '.h5')
            if verbose > 0:
                print('Loaded file: ', file_name + '.h5')

        else:
            processor = pylars.processing.rawprocessor.run_processor(
                run_to_process=self.run,
                processor_type='simple',
                sigma_level=5,
                baseline_samples=50)
            if verbose > 0:
                print((
                    f'Using Default values for sigma '
                    f'({processor.sigma_level}) and baseline samples '
                    f'({processor.baseline_samples}) calculation.'))

            data = processor.process_datasets(
                kind=self.kind, vbias=self.vbias, temp=self.temp)

            self.data = data

            self.save_data()
            if verbose > 0:
                print('Processed and saved file: ', file_name + '.h5')
def save_data(self, type: str = 'hdf5') ‑> NoneType

Saves the data within the processed_dataset object to a file.

Args

type : str, optional
type of file to save the data as. Can be 'hdf5' or 'csv'. Defaults to 'hdf5'.

Raises

AssertionError
Raises the object has no data attached.
NotImplementedError
Raises if the file format is different from 'hdf5' or 'csv'.
Expand source code
def save_data(self, type: str = 'hdf5') -> None:
    """Saves the data within the processed_dataset object
    to a file.

    Args:
        type (str, optional): type of file to save the data as. Can be
            'hdf5' or 'csv'. Defaults to 'hdf5'.

    Raises:
        AssertionError: Raises the object has no data attached.
        NotImplementedError: Raises if the file format is
            different from 'hdf5' or 'csv'.
    """
    if not isinstance(self.data, pd.DataFrame):
        raise AssertionError(
            "Data not loaded to object, run input_data() first.")

    file_config = self.format_kind_v_t(self.kind, self.vbias, self.temp)

    if type == 'hdf5':
        file_name = f'{file_config}-{self.process_hash}.h5'
        file_path = f'{self.path_processed}run{self.run.run_number}/'
        self.data.to_hdf(
            file_path + file_name,
            key='data',
            complevel=5)
        print('Saved file to:', file_path + file_name)

    elif type == 'csv':
        file_name = f'{file_config}-{self.process_hash}.csv'
        file_path = f'{self.path_processed}run{self.run.run_number}/'
        self.data.to_csv(file_path + file_name)
        print('Saved file to:', file_path + file_name)

    else:
        raise NotImplementedError(
            f"The requested type ({type}) is not" +
            "implemented. Choose 'hdf5', 'csv' or make a PR.")