Source code for maccorcyclingdata.testdata

import pandas as pd
import numpy as np
import os

[docs]def import_maccor_data(file_path , file_name, header=0):
    """
    Given the file path and file name of the testdata file, this function will import the csv file as a pandas df
    and clean it.

    Parameters
    -----------
    file_path : string
        File path

    file_name : string
        Filename
    
    header : integer
        Optional input that sets the header to a line number (default=2)
    
    Returns
    --------
    df : pandas dataframe
        The cleaned testdata file as a pandas df

    Examples
    ---------
    >>> import maccorcyclingdata.testdata as testdata
    >>> df = testdata.import_maccor_data('example_data/', 'testdata.csv')
    >>> df.head(5)
    """
    if not isinstance(file_path, str):
        raise TypeError('file path must be a string')

    if not isinstance(file_name, str):
        raise TypeError('file name must be a string')

    if not isinstance(header, int):
        raise TypeError('header must be an integer')

    if not os.path.exists(file_path+file_name):
        raise NotADirectoryError("The path " + str(file_path + file_name) + " not found")
    
    df = pd.read_csv(file_path+file_name, header =int(header))
    df = clean_maccor_df(df)
    return df

[docs]def import_multiple_csv_data(file_path):
    """
    Given the file path that holds multiple csv files (testdata files), this function will import and append 
    all of the csv files to one another as one dataframe. Returns a cleaned version of that dataframe.

    Parameters
    -----------
    file_path : string
        File path

    Returns
    --------
    df : pandas dataframe
        All of the cleaned csv files appended to one another as a pandas df

    Notes
    -----
    This function will append the csv files to one another depending on the order they appear in the directory.

    Examples
    ---------
    >>> import maccorcyclingdata.testdata as testdata
    >>> mult_df = testdata.import_multiple_csv_data('example_data/multiple_csv/')
    >>> mult_df.head(5)
    """
    if not isinstance(file_path, str):
        raise TypeError('file path must be a string')

    if not os.path.exists(file_path):
        raise NotADirectoryError("The path " + str(file_path) + " not found")

    df = pd.DataFrame()
    # r=root, d=directories, f = files
    for r, d, files in os.walk(file_path):
        # We only want to parse files that are CSVs
        files = [ file for file in files if file.endswith( ('.csv') ) ]
        files.sort()
        for file in files:
            file_loc = str(file_path+file)
            temp_df = pd.read_csv(file_loc, header=0)
            df = df.append(temp_df, ignore_index = True)
    df = clean_maccor_df(df)
    return df


[docs]def clean_maccor_df(df):
    """
    Given the testdata dataframe, this function will rename the headers and drop unnecessary columns. It will also change some of the units to match the column name and will remove all commas.

    Parameters
    -----------
    df : pandas dataframe
        The testdata dataframe

    Returns
    --------
    df : pandas dataframe
        The cleaned pandas df of the testdata

    Notes
    -----
    If the following columns exist, the function will delete these: ``ACR``, ``DCIR``, ``Watt-hr``, and ``nnnamed``.
    Examples
    ---------
    >>> import maccorcyclingdata.testdata as testdata
    >>> df = testdata.clean_maccor_df(df)
    >>> df.head(5)
    """

    if not isinstance(df, pd.DataFrame):
        raise TypeError('input must be a pandas dataframe')
    
    if not len(df.columns) < 14:
        raise IndexError("Pandas dataframe can have 14 columns max")
    
    if 'Watt-hr' in df.columns:
        df = df.drop(columns=['Watt-hr']) 
    if 'ACR' in df.columns:
        df = df.drop(columns=['ACR']) 
    if 'DCIR' in df.columns:
        df = df.drop(columns=['DCIR']) 
    if 'Unnamed: 13' in df.columns:
        df = df.drop(columns=['Unnamed: 13']) 
    df.replace(',','', regex=True, inplace=True)
    df.columns = ['cyc', 'step', 'test_time_s', 'step_time_s', 'capacity_mah', 'current_ma', 'voltage_v', 'dpt_time', 'thermocouple_temp_c', 'ev_temp'] #rename the column headers

    df[["cyc", "step", "test_time_s", "capacity_mah", "current_ma", "voltage_v", "thermocouple_temp_c", "ev_temp"]] = df[["cyc", "step", "test_time_s", "capacity_mah", "current_ma", "voltage_v", "thermocouple_temp_c", "ev_temp"]].apply(pd.to_numeric)
    
    return df

[docs]def delete_cycle_steps(df, steps_to_delete, decrement=False):
    """
    Given the testdata dataframe (from the import_maccor_data or import_multiple_csv_data functions) and a list of integers (step numbers that you want to delete), this function
    will delete all rows from the dataframe that have a cycle step index that matches any in the list of integers 
    
    Parameters
    -----------
    df : pandas dataframe
        The testdata dataframe

    steps_to_delete : array
        An array that has the step numbers you want to delete

    decrement : boolean
        If set to True, would shift cycle steps to adjust for the deleted steps

    Returns
    --------
    df : pandas dataframe
        The dataframe with the corresponding steps deleted

    Examples
    ---------
    >>> import maccorcyclingdata.testdata as testdata
    >>> del_df = testdata.delete_cycle_steps(df, [1], True)
    >>> del_df.head(5)
    """
    if not isinstance(df, pd.DataFrame):
        raise TypeError('df input must be a pandas dataframe')

    if not isinstance(steps_to_delete, list):
        raise TypeError('steps_to_delete input must be a list')

    if not isinstance(decrement, bool):
        raise TypeError('decrement input must be a boolean')
    
    if not len(df.columns) == 10:
        raise IndexError("Pandas dataframe must have 10 columns")

    if (df.columns.tolist() != ['cyc', 'step', 'test_time_s', 'step_time_s', 'capacity_mah', 'current_ma', 'voltage_v', 'dpt_time', 'thermocouple_temp_c', 'ev_temp']):
        raise IndexError("Pandas dataframe must have these columns: ['cyc', 'step', 'test_time_s', 'step_time_s', 'capacity_mah', 'current_ma', 'voltage_v', 'dpt_time', 'thermocouple_temp_c', 'ev_temp']")
    
    for x in steps_to_delete:
        to_be_deleted = df.index[df['step'] == x]
        df = df.drop(to_be_deleted)

    if decrement:
        steps_to_delete.sort(reverse = True) 
        for x in steps_to_delete:
            to_be_shifted = df.index[df['step'] > x]
            mini = min(df['step'][to_be_shifted].values)
            gap = mini-x
            all_values_larger = ((df['step'][to_be_shifted].values) - gap)
            df.loc[to_be_shifted, 'step'] = all_values_larger

    df = df.reset_index(drop = True)           
    return df

[docs]def get_index_range(df, cyc_range, cycle_step_idx = []):
    """
    Given the testdata dataframe (from the import_maccor_data or import_multiple_csv_data functions), this function returns the index range for the specified cycle range, 
    or if a cycle step index is passed, as subset of each cyle for only that specific cycle step.
    
    Parameters
    -----------
    df : pandas dataframe
        The testdata dataframe

    cyc_range : array
        An array of the cycles you want the indices for

    cycle_step_idx : array
        The step numbers that you want the indices of. Default value is all steps within each cycle.

    Returns
    --------
    index_range : vector
        A vector of the range of df indices for the specified cycle range

    Examples
    ---------
    >>> from maccorcyclingdata.testdata import get_index_range
    >>> ind = testdata.get_cycle_data(df, [1, 3, 5], [12])
    >>> print(ind[:6])
    """
    if not isinstance(df, pd.DataFrame):
        raise TypeError('df input must be a pandas dataframe')

    if not isinstance(cyc_range, list):
        raise TypeError('cyc_range input must be a list')

    if not isinstance(cycle_step_idx, list):
        raise TypeError('cycle_step_index input must be a list')

    if not len(df.columns) == 10:
        raise IndexError("Pandas dataframe must have 10 columns")

    if (df.columns.tolist() != ['cyc', 'step', 'test_time_s', 'step_time_s', 'capacity_mah', 'current_ma', 'voltage_v', 'dpt_time', 'thermocouple_temp_c', 'ev_temp']):
        raise IndexError("Pandas dataframe must have these columns: ['cyc', 'step', 'test_time_s', 'step_time_s', 'capacity_mah', 'current_ma', 'voltage_v', 'dpt_time', 'thermocouple_temp_c', 'ev_temp']")
    
    # If we are passed a cycle step index, then we provide the indicies for only that step.
    if len(cycle_step_idx) > 0:
        index_range = []
        if len(cyc_range) > 1:
            for i in range(cyc_range[0],cyc_range[1]+1):  # Need the '+1' so that we include the upper cycle.
                index_range = np.append( index_range,
                                        np.where((df['cyc'] == i) & (df["step"] == cycle_step_idx[0]))[0][:])
        else: 
            index_range = np.append( index_range,
                                    np.where((df['cyc'] == cyc_range[0]) & (df["step"] == cycle_step_idx[0]))[0][:])
    else:
        if len(cyc_range) > 1:
            index_range = np.where(np.logical_and(df['cyc'] >= cyc_range[0] , df['cyc']<= cyc_range[1] ))[0][:]
        else: 
            index_range = np.where(np.logical_and(df['cyc'] >= cyc_range[0] , df['cyc']<= cyc_range[0] ))[0][:]

    return index_range

[docs]def get_cycle_data(df, Headings , cyc_range, cycle_step_idx=[]):
    """
    Given the testdata df (from the import_maccor_data or import_multiple_csv_data functions), this function gets the data specified in the "Headings" for each sample within the specified cyc_range.
    
    Parameters
    -----------
    df : pandas dataframe
        The testdata dataframe

    Headings : array
        An array with the headers you want the data for

    cyc_range : array
        An array of the cycle numbers you want data for

    cycle_step_idx : array
        The step numbers within each cycle that you want the data for. Default value is all steps within each cycle.

    Returns
    --------
    data_df : pandas dataframe
        A pandas dataframe that has the data for the specified headers at the specified cycles and steps.

    Examples
    ---------
    >>> from maccorcyclingdata.testdata import get_cycle_data
    >>> data = testdata.get_cycle_data(df, ['current_ma', 'voltage_v'], [1, 3, 5], [12])
    >>> print(data[:6])
    """
    if not isinstance(df, pd.DataFrame):
        raise TypeError('df input must be a pandas dataframe')

    if not isinstance(Headings, list):
        raise TypeError('Headings input must be a list')

    if not isinstance(cyc_range, list):
        raise TypeError('cycle_range input must be a list')

    if not isinstance(cycle_step_idx, list):
        raise TypeError('cycle_step_index input must be a list')

    if not len(df.columns) == 10:
        raise IndexError("Pandas dataframe must have 10 columns")

    if (df.columns.tolist() != ['cyc', 'step', 'test_time_s', 'step_time_s', 'capacity_mah', 'current_ma', 'voltage_v', 'dpt_time', 'thermocouple_temp_c', 'ev_temp']):
        raise IndexError("Pandas dataframe must have these columns: ['cyc', 'step', 'test_time_s', 'step_time_s', 'capacity_mah', 'current_ma', 'voltage_v', 'dpt_time', 'thermocouple_temp_c', 'ev_temp']")
    
    # Find the index range for the specified cycle(s)
    index_range = get_index_range(df,cyc_range, cycle_step_idx)
    np.set_printoptions(suppress=True)
    # Create a numpy array to hold the headings values Each column will be a heading, each row will be a data point
    data = np.zeros([len(index_range),len(Headings)])

    data_df = pd.DataFrame()
    data_df['cyc'] = df['cyc'][index_range].values
    data_df['step'] = df['step'][index_range].values

    for i in range(0,len(Headings)):
        data[:,i] = df[Headings[i]][index_range]
        data_df[Headings[i]] = data[:,i]

    return data_df

[docs]def get_num_cycles(df):
    """
    Given the testdata dataframe (from the import_maccor_data or import_multiple_csv_data functions), this function will return the number of cycles.
    
    Parameters
    -----------
    df : pandas dataframe
        The testdata dataframe

    Returns
    --------                                   
    number_of_cycles : integer
        An integer of the number of cycles in the dataframe

    Notes
    ------
    This function assumes that the first cycle is cycle 0.

    Examples
    ---------
    >>> from maccorcyclingdata.testdata import get_num_cycles
    >>> get_num_cycles(df)
    """
    if not isinstance(df, pd.DataFrame):
        raise TypeError('df input must be a pandas dataframe')

    if not len(df.columns) == 10:
        raise IndexError("Pandas dataframe must have 10 columns")

    if (df.columns.tolist() != ['cyc', 'step', 'test_time_s', 'step_time_s', 'capacity_mah', 'current_ma', 'voltage_v', 'dpt_time', 'thermocouple_temp_c', 'ev_temp']):
        raise IndexError("Pandas dataframe must have these columns: ['cyc', 'step', 'test_time_s', 'step_time_s', 'capacity_mah', 'current_ma', 'voltage_v', 'dpt_time', 'thermocouple_temp_c', 'ev_temp']")
    
    number_of_cycles = int(max(df['cyc'])) + 1
    return number_of_cycles