Use Plotly to Stylize Charts and Graphics

First, pandas ExcelWriter provides quite a few Excel specific formatting methods through the xlsxwriter engine, it might not have the full flexibility offered by openpyxl, so the best is to use both by pd.to_excel then from openpyxl import load_workbook, wb = load_workbook(‘output.xlsx’), ws = wb[‘Sheet1’].

However, this might cause read in and out I/O operations requiring much longer time to run. One bypass is to still create an ExcelWriter object, write in the data, save the workbook to a variable without saving to disk, then apply modifications with openpyxl, and save to disk.

# Create a workbook
wb = Workbook()

# Create an ExcelWriter object with the existing workbook
ew = ExcelWriter('path_to_file.xlsx', engine='openpyxl')
ew.book = wb

# Add a DataFrame to the workbook
df.to_excel(ew, sheet_name='Sheet1')

# Save the workbook to a variable without saving to disk
wb = ew.book

# Apply modifications with openpyxl
ws = wb['Sheet1']
ws['A1'] = "This is a test"

# Save to disk
wb.save('path_to_file.xlsx')

Adding on additional to_excel settings in the fpe that can be incorporated before processing: Vontent-Type will be set to application/vnd.ms-excel.Parameters

  • df (DataFrame) – The DataFrame containing the values to output.
  • sheet_name (strdefault ‘Sheet1’) – Name of sheet which will contain DataFrame.
  • na_rep (strdefault ”) – Missing data representation.
  • float_format (stroptional) – Format string for floating point numbers. For example float_format="%.2f" will format 0.1234 to 0.12.
  • columns (sequence or list of stroptional) – Columns to write.
  • header (bool or list of strdefault True) – Write out the column names. If a list of string is given it is assumed to be aliases for the column names.
  • index (booldefault True) – Write row names (index).
  • index_label (str or sequenceoptional) – Column label for index column(s) if desired. If not specified, and header and index are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex.
  • startrow (intdefault 0) – Upper left cell row to dump data frame.
  • startcol (intdefault 0) – Upper left cell column to dump data frame.
  • merge_cells (booldefault True) – Write MultiIndex and Hierarchical Rows as merged cells.
  • inf_rep (strdefault ‘inf’) – Representation for infinity (there is no native representation for infinity in Excel).
  • verbose (booldefault True) – Display more information in the error logs.
  • freeze_panes (tuple of int (length 2)optional) – Specifies the one-based bottommost row and rightmost column that is to be frozen.

Attached the codes here and a sample chart:

# Author Naixian Carucci Index Solution on 20240409
# This is the py file as script to be executed by FPE API, to create corporate style charts


###########################################################################################################################################################
import openpyxl
import matplotlib.figure
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

# plotting
import plotly
import plotly.express as px
# These two lines change plot background to dark.  Can omit if you are using a light background.
import plotly.io as pio
pio.templates.default = 'plotly_dark'
import plotly.express as px
import plotly.graph_objects as go
from io import BytesIO

import kaleido

# Function to create line plot
def create_line_plot(df, sheet_name):
    
    # preprocess df first
    first_row_list = [val for val in df.iloc[0].tolist() if not pd.isna(val)]
    df = df.iloc[1:, ]

    fig_performance = go.Figure()  # Creates the Plotly figure

    # Iterate over each column in the DataFrame
    for i, col in enumerate(df.columns[[2, 4]]):
        # Prepare the y-values - drop NaN values
        y_values = df[col].dropna()

        # Define legend label
        legend_label = f"{first_row_list[i]} {col}: {y_values.iloc[-1]:.4f}"

        # Add a trace to the figure for this column
        colors = ['blue', 'green']
        fig_performance.add_trace(
            go.Scatter(
                x=df.index[1:], 
                y=y_values[1:], 
                mode='lines',
                name=legend_label,
                line = dict(color = colors[i])
            )
        )

    fig_performance.update_layout(  # Customize the layout of the figure
        title=f"Cumulative Total Return",
        title_x=0.5,  # Center the title
        xaxis_title="",
        yaxis_title="",
        legend_title="",
        template="plotly_white", 
        legend=dict(
            font=dict(size=10),  # Decrease font size to accommodate larger labels
            orientation="v",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        xaxis=dict(
            title="",  # Assuming you want no specific title for the x-axis
            tickfont=dict(size=7.5),
            showline=True,
            showgrid=False,
            linecolor='black',
            tickangle=-45,  # Negative angle can sometimes be more visually appealing
            # tickformat="%Y-%m-%d",  # Example date format (customize as needed)
            # dtick="M1",  # Tick spacing by one month, adjust according to your data density
            tickmode="auto",  # Use automatic tick generation
            nticks=20,  # Limit the number of total ticks, adjust to suit your data density
        ),
        yaxis=dict(
            title="",  # Assuming you want no specific title for the x-axis
            tickfont=dict(size=8.5),
            showline=True,
            showgrid=False,
            # linecolor='black',
            # tickangle=-45,  # Negative angle can sometimes be more visually appealing
            # tickformat="%Y-%m-%d",  # Example date format (customize as needed)
            # dtick="M1",  # Tick spacing by one month, adjust according to your data density
            tickmode="auto",  # Use automatic tick generation
            # nticks=20,  # Limit the number of total ticks, adjust to suit your data density
        ),
    )
    # fig_performance.show()
    return fig_performance

def create_globe_plot(df, sheet_name):
    df.columns = df.iloc[0]
    df = df[1:]
    # color map options
    color_continuous_scales = ['aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance',
                    'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg',
                    'brwnyl', 'bugn', 'bupu', 'burg', 'burgyl', 'cividis', 'curl',
                    'darkmint', 'deep', 'delta', 'dense', 'earth', 'edge', 'electric',
                    'emrld', 'fall', 'geyser', 'gnbu', 'gray', 'greens', 'greys',
                    'haline', 'hot', 'hsv', 'ice', 'icefire', 'inferno', 'jet',
                    'magenta', 'magma', 'matter', 'mint', 'mrybm', 'mygbm', 'oranges',
                    'orrd', 'oryel', 'oxy', 'peach', 'phase', 'picnic', 'pinkyl',
                    'piyg', 'plasma', 'plotly3', 'portland', 'prgn', 'pubu', 'pubugn',
                    'puor', 'purd', 'purp', 'purples', 'purpor', 'rainbow', 'rdbu',
                    'rdgy', 'rdpu', 'rdylbu', 'rdylgn', 'redor', 'reds', 'solar',
                    'spectral', 'speed', 'sunset', 'sunsetdark', 'teal', 'tealgrn',
                    'tealrose', 'tempo', 'temps', 'thermal', 'tropic', 'turbid',
                    'turbo', 'twilight', 'viridis', 'ylgn', 'ylgnbu', 'ylorbr',
                    'ylorrd']

    # different ways a globe can be represented in 2-D
    projections = ['equirectangular', 'mercator', 'orthographic', 'natural earth', 
                    'kavrayskiy7', 'miller', 'robinson', 'eckert4', 'azimuthal equal area', 'azimuthal equidistant', 
                    'conic equal area', 'conic conformal', 'conic equidistant', 'gnomonic', 'stereographic',
                    'mollweide', 'hammer', 'transverse mercator', 'albers usa',  'winkel tripel', 'aitoff', 'sinusoidal']

    # scope is `world` by default (as below)
    scopes =  ['world', 'usa', 'europe', 'asia', 'africa', 'north america', 'south america']
    # active country weights
    fig_domicile = px.choropleth(df, locations=df.index, locationmode='country names', color='Active Weight', 
                        title='Active Country Weights (%)', height=650, hover_name=df.index, color_continuous_scale='temps_r', projection='natural earth') 

    # fig_domicile.show()
    return fig_domicile



def create_under_over_weight_plot(df, sheet_name, level=0):
    # Replace the header
    df.columns = df.iloc[0]
    df = df[1:]

    colors = ['blue', 'green', 'yellow']
    
    # Define the column names corresponding to each level
    columns = ['Economic Sector - FactSet', 'Industry - FactSet', 'company_name']

    df_grouped = df.groupby(columns[level]).sum().reset_index()
    last_three_cols = df.columns[-3:].tolist()
    # Melt the DataFrame to a long format
    df_melted = df_grouped.melt(id_vars=columns[level], 
                                 value_vars=last_three_cols, 
                                 var_name='Weight Category', 
                                 value_name='Value')
    df_sorted = df_melted.sort_values(by='Value', ascending=True)
    # Create the plot
    fig = px.bar(df_sorted, 
                 x='Value', 
                 y=columns[level], 
                 color='Weight Category',
                 orientation='h',
                 title="Sector Portfolio Weights",
                 color_discrete_sequence=colors)

    # Update axises labels
    fig.update_yaxes(title_text=columns[level], tickfont=dict(size=12))
    fig.update_xaxes(title_text="Port. Weight", tickfont=dict(size=12))
    fig.show()    
    return fig   
    
    
    
    

# Mapping of sheet names to plot types
plot_mapper = {
    'performance total return': lambda df, sheet_name, level=None: create_line_plot(df, sheet_name),
    'domicile country exposure': lambda df, sheet_name, level=None: create_globe_plot(df, sheet_name),
    'sector weights exposure': lambda df, sheet_name, level=0: create_under_over_weight_plot(df, sheet_name, level),
    # More sheets can be added
}


# Open excel file
xls = pd.ExcelFile('./Sample_PA_Output.xlsx')
wb = load_workbook(xls)  # Load workbook
# Iterate over each sheet in the excel file
# sn_list = [x for x in xls.sheet_names]
# sn_list = ['performance total return']
# sn_list = ['domicile country exposure']
# sn_list = ['sector weights exposure']
sn_list = ['performance total return',  'sector weights exposure']
for sheet_name in sn_list:
    df = pd.read_excel(xls, sheet_name=sheet_name, header=None)
    # Drop the row full of NaNs
    df.dropna(how='all', inplace=True)
    # Set the first column (date) as index
    df.set_index(df.columns[0], inplace=True)
    # print(df)
    # df.columns
    level=1 #modify to make it an input in actual script
    plot = plot_mapper[sheet_name](df, sheet_name, level)
    type(plot)
    # Get the sheet to add image to
    ws = wb[sheet_name]

    # img_bytes = plot.to_image(format="png")
    # image_data = BytesIO(img_bytes)  # Convert bytes to a BytesIO object
    # img = Image(image_data)
    # ws.add_image(img, 'K2')
            
    max_col = len(df.columns)
    start_row = 0  # Excel rows are indexed starting at 0, but row index 0 is the header
    start_col = max_col + 2  # Start from the column after the DataFrame table, Indexed from zero
    img_bytes = plot.to_image(format="png")
    image_data = BytesIO(img_bytes)  # Convert bytes to a BytesIO object
    img = Image(image_data)
    ws.add_image(img, ws.cell(row=start_row+2, column=start_col).coordinate)

# Save with a new name

wb.save("Modified.xlsx")







Leave a comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.