Source code for mswh.tools.plots

import logging
import os

import numpy as np
import pandas as pd

from plotly.offline import iplot, init_notebook_mode

import plotly.graph_objs as go
import plotly.io as pio

log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)


[docs]class Plot(object): """Creates and saves plots to visualize and correlate arrays, usually timeseries Parameters: title: str Plot title data_headers: list A list of labels in the same order as the corresponding data If None the labels will be the df column labels, or integer indices if a list got provided label_h: str Horizontal axis label label_v: str Vertical axis label legend: boolean Plot the legend or not save_image: boolean If True saves the created image with either a given or default path and filename. Supported file types are 'png' and 'pdf', as specified in the filename. duration_curve: boolean If True it sorts the columns (df or arrays) and plots the duration_curve, returns a duration_curve metric as a real outpath: string or '' (for current directory) Path to save the png image of the plot boxmean: True, False, 'sd', 'Only Mean' notebook_mode: boolean Plot in the notebook if True width: int Image width height: int Image height fontsize: int Axis label font size Returns: fig: plotly figure if self.interactive else True """ def __init__( self, title="", label_h="Time [h]", label_v="Component performance", data_headers=None, save_image=True, legend=True, outpath="", duration_curve=False, boxmode="group", notebook_mode=False, width=1200, height=800, fontsize=28, legend_x=0.4, legend_y=1.0, margin_l=200.0, margin_b=200.0, ): self.data_headers = data_headers self.save_image = save_image self.outpath = outpath self.interactive = notebook_mode self.duration_curve = duration_curve # plot formatting, see # https://plot.ly/python/reference/#layout-titlefont self.layout = go.Layout( font=dict(size=fontsize, family="arial"), title=title, titlefont=dict(size=fontsize * 1.0, family="arial"), xaxis=dict( title=label_h, titlefont=dict( # family='Courier New, monospace', size=fontsize, color="#7f7f7f", ), tickfont=dict(size=fontsize * 0.8), ), yaxis=dict( title=label_v, titlefont=dict( # family='Courier New, monospace', size=fontsize, color="#7f7f7f", ), tickfont=dict(size=fontsize * 0.6), ), showlegend=legend, width=width, height=height, margin=dict(l=margin_l, b=margin_b), legend=dict( x=legend_x, y=legend_y, font=dict(family="arial", size=fontsize * 0.8), ), ) self.boxlayout = go.Layout( font=dict(size=fontsize, family="arial"), title=title, titlefont=dict(size=fontsize * 1.2, family="arial"), xaxis=dict( title=label_h, titlefont=dict( # family='Courier New, monospace', size=fontsize, color="#7f7f7f", ), tickfont=dict(size=fontsize * 0.8), ), yaxis=dict( title=label_v, titlefont=dict( # family='Courier New, monospace', size=fontsize, color="#7f7f7f", ), tickfont=dict(size=fontsize * 0.8), ), showlegend=legend, width=width, height=height, margin=dict(l=margin_l, b=margin_b), legend=dict(x=legend_x, y=legend_y), boxmode=boxmode, )
[docs] def scatter(self, data, outfile="scatter.png", modes="lines+markers"): """Creates a scatter plot Parameters: data: array/list, pd series, list of arrays/lists, pd df Provide a list or arrays/lists or a pandas dataframe. The variables should be ordered in pairs such that each odd variable in the list/first column in the df gets assigned to the horizontal axis, each even variable to the vertical axes. Each pair needs to have the same length, but pairs can be of a different length. outfile: str Filename, include .png, .png .pdf modes: str or list of str 'markers', 'lines', 'lines + markers' or a list of the above to assign to each plot (one string in a list for each pair of data) Returns: fig: plotly figure if self.interactive else True """ # some input format error handling, not exhaustive if (isinstance(data, list)) and (len(data) < 2): msg = ( "Provide at least two arrays or columns to" "create a scatter plot. Or try Series plot " "for a single column of data versus its index." ) log.error(msg) raise Exception if (isinstance(data, pd.Series)) or ( (isinstance(data, pd.DataFrame)) and (data.shape[1] == 1) ): msg = ( "Provide a dataframe with no less than two" "columns. Series plot can plot a single column" "against its index." ) log.error(msg) raise Exception # rectangles the data if passed as a list of lists/arrays # if some of the lists/arrays are shorter, the gaps are # filled with np.nan if isinstance(data, list): df_data = pd.DataFrame( data=np.empty( ( len(max(data, key=len)), len(data), ) ) * np.nan ) col_inx = 0 for i in data: if isinstance(i, list): i_list = i else: i_list = i.tolist() df_data[col_inx] = ( i_list + (np.empty(df_data.shape[0] - len(i)) * np.nan).tolist() ) col_inx += 1 data = df_data.copy() if self.duration_curve: for col_index in range(0, df_data.shape[1]): data.iloc[:, col_index] = ( data.iloc[:, col_index].sort_values(ascending=False).values ) if self.data_headers: data.columns = self.data_headers num_columns = data.shape[1] if not isinstance(modes, list): list_modes = [modes] * int(num_columns / 2) else: list_modes = modes if num_columns % 2 != 0: msg = ( "Provide an even number of columns," "e.g. [x1, y1, x2, y2, ...]" ) log.error(msg) raise Exception plot_data = [] for col_index in range(0, num_columns, 2): plot_data.append( go.Scatter( x=data.iloc[:, col_index], y=data.iloc[:, col_index + 1], mode=list_modes[int(col_index / 2.0)], name=data.columns[col_index + 1], ) ) fig = go.Figure(data=plot_data, layout=self.layout) if self.save_image: if self.outpath == None: self.outpath = os.getcwd() if not os.path.exists(self.outpath): os.makedirs(self.outpath) pio.write_image(fig, os.path.join(self.outpath, outfile)) if self.interactive: try: iplot(fig) return fig except: log.error("Interactive mode failed.") raise Exception return True
[docs] def series( self, data, index_in_a_column=None, outfile="series.png", modes="lines+markers", ): """Plots all series data against either the index or the first provided series. It can sort the data and plot the duration_curve. Parameters: data: array/list, pd series, list of arrays/lists, pd df Provide an array or a list if plotting a single variable. If plotting multiple variables provide a list of arrays or a pandas dataframe. Horizontal axis corresponds to: * if pd df: the index of the dataframe or the first columns of the dataframe * if list or arrays/lists: a range of array length of the first array/list in the list All arrays in the list need to have the same length. index_in_a_column: boolean Horizontal axis labels If None, dataframe index is used, otherwise pass a column label for a column (it will not be considered as a series to plot) outfile: str Filename, include .png, .png .pdf modes: str or list of str 'markers', 'lines', 'lines+markers' or a list of the above to assign to each column of data, excluding the first column if index_in_a_column is not None Returns: fig: plotly figure if self.interactive else True """ # rectangles the data if passed as a list of lists/arrays # if some of the lists/arrays are shorter, the gaps are # filled with np.nan if isinstance(data, list): df_data = pd.DataFrame( data=np.empty( ( len(max(data, key=len)), len(data), ) ) * np.nan ) col_inx = 0 for i in data: if isinstance(i, np.ndarray): df_data[col_inx] = np.concatenate( ( i, ( np.empty((1, df_data.shape[0] - len(i))) * np.nan )[0], ) ) if isinstance(i, list): df_data[col_inx] = ( i + (np.empty((1, df_data.shape[0] - len(i))) * np.nan)[ 0 ].tolist() ) col_inx += 1 data = df_data.copy() if index_in_a_column is not None: labels_h_axis = data.loc[:, index_in_a_column] data = data.drop(columns=[index_in_a_column]) else: labels_h_axis = data.index if self.data_headers: data.columns = self.data_headers num_columns = data.shape[1] if not isinstance(modes, list): list_modes = [modes] * num_columns else: list_modes = modes if self.duration_curve: for col_index in range(0, num_columns): data.iloc[:, col_index] = ( data.iloc[:, col_index].sort_values(ascending=False).values ) plot_data = [] for col_index in range(num_columns): plot_data.append( go.Scatter( x=labels_h_axis, y=data.iloc[:, col_index], mode=list_modes[col_index], name=data.columns[col_index], ) ) fig = go.Figure(data=plot_data, layout=self.layout) if self.save_image: if self.outpath == None: self.outpath = os.getcwd() if not os.path.exists(self.outpath): os.makedirs(self.outpath) pio.write_image(fig, os.path.join(self.outpath, outfile)) if self.interactive: try: iplot(fig) return fig except: log.error("Interactive mode failed.") raise Exception return True
[docs] def box( self, dfs, plot_cols=None, groupby_cols=None, df_cat=None, outfile="box.png", boxmean=False, colors=["#3D9970", "#FF4136", "#FF851B"], title="Energy Use", boxpoints="outliers", ): """Creates box plots for the chosen `plot_col` and can group plots by the `groupby_col`. Parameters: dfs: list of dfs df_cat: list of str Indicator of the category carried by the dfs (E.g. the dfs differ by housing type) plot_col: list of columns to plot, one from each df in dfs. If multiple dfs are passed, the values will be shown as groups on the plot groupby_cols: list of cols to use as x axis, from each df. Use the same column if it has the same elements. Use None if x axis category not used boxpoints: False, 'all', 'outliers', 'suspectedoutliers' See https://plot.ly/python/reference/#box Returns: fig: plotly figure if self.interactive else True """ # Extract y values y = dict() x = dict() trace = dict() df_ctg = dict() data = list() i = 0 for df in dfs: y[i] = df[plot_cols[i]].values.tolist() if (groupby_cols is not None) and (groupby_cols[i] is not None): x[i] = df[groupby_cols[i]].values.tolist() else: x[i] = None if df_cat[i] is not None: df_ctg[i] = df_cat[i] else: df_ctg[i] = "" trace[i] = go.Box( y=y[i], x=x[i], name=plot_cols[i] + " - " + df_ctg[i], boxpoints=boxpoints, marker=dict(color=colors[i]), boxmean=boxmean, ) data.append(trace[i]) i += 1 fig = go.Figure(data=data, layout=self.boxlayout) if self.save_image: if self.outpath == None: self.outpath = os.getcwd() if not os.path.exists(self.outpath): os.makedirs(self.outpath) pio.write_image(fig, os.path.join(self.outpath, outfile)) if self.interactive: try: iplot(fig) return fig except: log.error("Interactive mode failed.") raise Exception return True
# size and color modes: # ......... # mode='markers', # marker={'size': sz, # 'color': colors, # 'opacity': 0.6, # 'colorscale': 'Viridis'