Source code for maccorcyclingdata.schedules

import pandas as pd
import numpy as np
import os

[docs]def import_schedules(file_path, file_name):
    """
    Given the file path and file name (of the schedule file that is inputted into the Maccor Cycler), this 
    function will import and clean the schedule file and return it as a df. 

    Parameters
    -----------
    file_path : string
        File path

    file_name : string
        Filename

    Returns
    --------
    df : pandas dataframe
        A cleaned schedule df

    Notes
    ------
    The schedule file should be input as a csv.
    
    Examples
    ---------
    >>> import maccorcyclingdata.schedules as schedules
    >>> schedule_df = schedules.import_schedules('example_data/','schedule.csv')
    >>> schedule_df.head(5)
    """
    if not isinstance(file_path, str):
        raise TypeError('file path must be a string')

    if not isinstance(file_name, str):
        raise TypeError('file name must be a string')

    if not os.path.exists(file_path+file_name):
        raise NotADirectoryError("The path " + str(file_path + file_name) + " not found")

    df = pd.read_csv(file_path + file_name)
    df = df.dropna(how='all') #delete the rows that are completely blank
    df.columns = ['step', 'step_type', 'step_mode', 'step_mode_value', 'step_limit', 'step_limit_value', 'step_end_type', 'step_end_type_op', 'step_end_type_value', 'goto_step', 'report_type', 'report_type_value', 'options', 'step_note'] #rename the column headers
    df = df.reset_index(drop=True) #reset the df index
    #this section of the function creates an array that has the indices of the row where the multi-row step starts    
    arr = []
    for ind in df.index: 
        if pd.isnull(df['step'][ind]):
            arr.append((ind-1)) #array contains all indices of when the step is "nan" - 1 (basically, the logic is that the row before the row where the step is nan is when the multi-line step begins)
    for x in range((len(arr) - 1), -1, -1): #iterates through the array backwards (however, since the multi-line steps are not just two lines long and can be three/four/etc lines long, if the values are consecutive it means it is still a part of the previous multi-line group so it needs to be deleted)
        if (arr[x]) == (arr[x - 1] + 1):
            del arr[x]
    for x in arr:
        #the only columns that have multi-line steps are end_type, op, value, and goto, so make an array for each of those columns (the arrays hold the value of the first line of multi-line step group)
        end_type = [df['step_end_type'][x]]
        op = [df['step_end_type_op'][x]]
        value = [df['step_end_type_value'][x]]
        goto = [df['goto_step'][x]]
        ind = x + 1
        while pd.isnull(df['step'][ind]): #each time the line after the first line of the multi-line group has a null value at the step it is appended to the arrays with respect to the column, once the next line returns a not null value at the step column, it means it has moved on to the next step
            end_type.append(df['step_end_type'][ind])
            op.append(df['step_end_type_op'][ind])
            value.append(df['step_end_type_value'][ind ])
            goto.append(df['goto_step'][ind])
            #df = df.drop([df.index[ind]]) #delete the row whose values were just appended to the arrays
            ind += 1
        df_update = pd.DataFrame({'step_end_type': [end_type], 'step_end_type_op': [op], 'step_end_type_value': [value], 'goto_step': [goto]}, index=[x])
        df.update(df_update) #add the arrays into their respective places in the original df

    df = df.dropna(subset=['step'])
    #set the type of the step columns to int
    df = df.astype({'step': int})
    df = df.reset_index(drop=True) #reset the df index
    return df

[docs]def sort_scheduler_steps(schedule_df):
    """
    Given the schedule_df (the df that was output by import_schedules function), this function will sort rest, charge, discharge, advance
    cycle, and end step numbers.

    Parameters
    -----------
    schedule_df : pandas dataframe
        The cleaned dataframe of the schedule file

    Returns
    --------
    rest_steps : array
        An array of the steps from the schedule file that correlate to rest steps
    
    charge_steps : array 
        An array of the steps from the schedule file that correlate to charging steps
    
    advance_steps : array 
        An array of the steps from the schedule file that correlate to steps when the cycle is advanced
    
    discharge_steps : array 
        An array of the steps from the schedule file that correlate to discharging steps
    
    end_steps : array 
        An array of the steps from the schedule file that correlate to ending steps
    
    max_step : integer 
        The last step from the schedule file

    Examples
    ---------
    >>> import maccorcyclingdata.schedules as schedules
    >>> rest_steps, charge_steps, advance_steps, discharge_steps, end_steps, max_step = sort_scheduler_steps(schedule_df)
    """

    if not isinstance(schedule_df, pd.DataFrame):
        raise TypeError('schedule_df input must be a pandas dataframe')

    if not len(schedule_df.columns) == 14:
        raise IndexError("Pandas dataframe must have 14 columns")

    if (schedule_df.columns.tolist() != ['step', 'step_type', 'step_mode', 'step_mode_value', 'step_limit', 'step_limit_value', 'step_end_type', 'step_end_type_op', 'step_end_type_value', 'goto_step', 'report_type', 'report_type_value', 'options', 'step_note']):
        raise IndexError("Pandas dataframe must have these columns: ['step', 'step_type', 'step_mode', 'step_mode_value', 'step_limit', 'step_limit_value', 'step_end_type', 'step_end_type_op', 'step_end_type_value', 'goto_step', 'report_type', 'report_type_value', 'options', 'step_note']")
    
    rest_steps = []
    charge_steps = []
    advance_steps = []
    discharge_steps = []
    end_steps = []
    for i in schedule_df.index:
        if schedule_df['step_type'][i] == "Rest":
            rest_steps.append(i+1)
        if schedule_df['step_type'][i] == "Charge":
            charge_steps.append(i+1)
        if schedule_df['step_type'][i] == "Discharge":
            discharge_steps.append(i+1)
        if schedule_df['step_type'][i] == "Advance Cycle":
            advance_steps.append(i+1)
        if schedule_df['step_type'][i] == "End":
            end_steps.append(i+1)        
        max_step = schedule_df["step"].iloc[-1]
        
    return rest_steps, charge_steps, advance_steps, discharge_steps, end_steps, max_step