Source code for maccorcyclingdata.schedules

import pandas as pd
import numpy as np
import os

[docs]def import_schedules(file_path, file_name): """ Given the file path and file name (of the schedule file that is inputted into the Maccor Cycler), this function will import and clean the schedule file and return it as a df. Parameters ----------- file_path : string File path file_name : string Filename Returns -------- df : pandas dataframe A cleaned schedule df Notes ------ The schedule file should be input as a csv. Examples --------- >>> import maccorcyclingdata.schedules as schedules >>> schedule_df = schedules.import_schedules('example_data/','schedule.csv') >>> schedule_df.head(5) """ if not isinstance(file_path, str): raise TypeError('file path must be a string') if not isinstance(file_name, str): raise TypeError('file name must be a string') if not os.path.exists(file_path+file_name): raise NotADirectoryError("The path " + str(file_path + file_name) + " not found") df = pd.read_csv(file_path + file_name) df = df.dropna(how='all') #delete the rows that are completely blank df.columns = ['step', 'step_type', 'step_mode', 'step_mode_value', 'step_limit', 'step_limit_value', 'step_end_type', 'step_end_type_op', 'step_end_type_value', 'goto_step', 'report_type', 'report_type_value', 'options', 'step_note'] #rename the column headers df = df.reset_index(drop=True) #reset the df index #this section of the function creates an array that has the indices of the row where the multi-row step starts arr = [] for ind in df.index: if pd.isnull(df['step'][ind]): arr.append((ind-1)) #array contains all indices of when the step is "nan" - 1 (basically, the logic is that the row before the row where the step is nan is when the multi-line step begins) for x in range((len(arr) - 1), -1, -1): #iterates through the array backwards (however, since the multi-line steps are not just two lines long and can be three/four/etc lines long, if the values are consecutive it means it is still a part of the previous multi-line group so it needs to be deleted) if (arr[x]) == (arr[x - 1] + 1): del arr[x] for x in arr: #the only columns that have multi-line steps are end_type, op, value, and goto, so make an array for each of those columns (the arrays hold the value of the first line of multi-line step group) end_type = [df['step_end_type'][x]] op = [df['step_end_type_op'][x]] value = [df['step_end_type_value'][x]] goto = [df['goto_step'][x]] ind = x + 1 while pd.isnull(df['step'][ind]): #each time the line after the first line of the multi-line group has a null value at the step it is appended to the arrays with respect to the column, once the next line returns a not null value at the step column, it means it has moved on to the next step end_type.append(df['step_end_type'][ind]) op.append(df['step_end_type_op'][ind]) value.append(df['step_end_type_value'][ind ]) goto.append(df['goto_step'][ind]) #df = df.drop([df.index[ind]]) #delete the row whose values were just appended to the arrays ind += 1 df_update = pd.DataFrame({'step_end_type': [end_type], 'step_end_type_op': [op], 'step_end_type_value': [value], 'goto_step': [goto]}, index=[x]) df.update(df_update) #add the arrays into their respective places in the original df df = df.dropna(subset=['step']) #set the type of the step columns to int df = df.astype({'step': int}) df = df.reset_index(drop=True) #reset the df index return df
[docs]def sort_scheduler_steps(schedule_df): """ Given the schedule_df (the df that was output by import_schedules function), this function will sort rest, charge, discharge, advance cycle, and end step numbers. Parameters ----------- schedule_df : pandas dataframe The cleaned dataframe of the schedule file Returns -------- rest_steps : array An array of the steps from the schedule file that correlate to rest steps charge_steps : array An array of the steps from the schedule file that correlate to charging steps advance_steps : array An array of the steps from the schedule file that correlate to steps when the cycle is advanced discharge_steps : array An array of the steps from the schedule file that correlate to discharging steps end_steps : array An array of the steps from the schedule file that correlate to ending steps max_step : integer The last step from the schedule file Examples --------- >>> import maccorcyclingdata.schedules as schedules >>> rest_steps, charge_steps, advance_steps, discharge_steps, end_steps, max_step = sort_scheduler_steps(schedule_df) """ if not isinstance(schedule_df, pd.DataFrame): raise TypeError('schedule_df input must be a pandas dataframe') if not len(schedule_df.columns) == 14: raise IndexError("Pandas dataframe must have 14 columns") if (schedule_df.columns.tolist() != ['step', 'step_type', 'step_mode', 'step_mode_value', 'step_limit', 'step_limit_value', 'step_end_type', 'step_end_type_op', 'step_end_type_value', 'goto_step', 'report_type', 'report_type_value', 'options', 'step_note']): raise IndexError("Pandas dataframe must have these columns: ['step', 'step_type', 'step_mode', 'step_mode_value', 'step_limit', 'step_limit_value', 'step_end_type', 'step_end_type_op', 'step_end_type_value', 'goto_step', 'report_type', 'report_type_value', 'options', 'step_note']") rest_steps = [] charge_steps = [] advance_steps = [] discharge_steps = [] end_steps = [] for i in schedule_df.index: if schedule_df['step_type'][i] == "Rest": rest_steps.append(i+1) if schedule_df['step_type'][i] == "Charge": charge_steps.append(i+1) if schedule_df['step_type'][i] == "Discharge": discharge_steps.append(i+1) if schedule_df['step_type'][i] == "Advance Cycle": advance_steps.append(i+1) if schedule_df['step_type'][i] == "End": end_steps.append(i+1) max_step = schedule_df["step"].iloc[-1] return rest_steps, charge_steps, advance_steps, discharge_steps, end_steps, max_step