First, pandas ExcelWriter provides quite a few Excel specific formatting methods through the xlsxwriter engine, it might not have the full flexibility offered by openpyxl, so the best is to use both by pd.to_excel then from openpyxl import load_workbook, wb = load_workbook(‘output.xlsx’), ws = wb[‘Sheet1’].
However, this might cause read in and out I/O operations requiring much longer time to run. One bypass is to still create an ExcelWriter object, write in the data, save the workbook to a variable without saving to disk, then apply modifications with openpyxl, and save to disk.
# Create a workbook
wb = Workbook()
# Create an ExcelWriter object with the existing workbook
ew = ExcelWriter('path_to_file.xlsx', engine='openpyxl')
ew.book = wb
# Add a DataFrame to the workbook
df.to_excel(ew, sheet_name='Sheet1')
# Save the workbook to a variable without saving to disk
wb = ew.book
# Apply modifications with openpyxl
ws = wb['Sheet1']
ws['A1'] = "This is a test"
# Save to disk
wb.save('path_to_file.xlsx')
Adding on additional to_excel settings in the fpe that can be incorporated before processing: Vontent-Type will be set to application/vnd.ms-excel.Parameters
- df (DataFrame) – The DataFrame containing the values to output.
- sheet_name (str, default ‘Sheet1’) – Name of sheet which will contain DataFrame.
- na_rep (str, default ”) – Missing data representation.
- float_format (str, optional) – Format string for floating point numbers. For example
float_format="%.2f"will format 0.1234 to 0.12. - columns (sequence or list of str, optional) – Columns to write.
- header (bool or list of str, default True) – Write out the column names. If a list of string is given it is assumed to be aliases for the column names.
- index (bool, default True) – Write row names (index).
- index_label (str or sequence, optional) – Column label for index column(s) if desired. If not specified, and header and index are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex.
- startrow (int, default 0) – Upper left cell row to dump data frame.
- startcol (int, default 0) – Upper left cell column to dump data frame.
- merge_cells (bool, default True) – Write MultiIndex and Hierarchical Rows as merged cells.
- inf_rep (str, default ‘inf’) – Representation for infinity (there is no native representation for infinity in Excel).
- verbose (bool, default True) – Display more information in the error logs.
- freeze_panes (tuple of int (length 2), optional) – Specifies the one-based bottommost row and rightmost column that is to be frozen.
Attached the codes here and a sample chart:
# Author Naixian Carucci Index Solution on 20240409
# This is the py file as script to be executed by FPE API, to create corporate style charts
###########################################################################################################################################################
import openpyxl
import matplotlib.figure
from openpyxl import load_workbook
from openpyxl.drawing.image import Image
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
# plotting
import plotly
import plotly.express as px
# These two lines change plot background to dark. Can omit if you are using a light background.
import plotly.io as pio
pio.templates.default = 'plotly_dark'
import plotly.express as px
import plotly.graph_objects as go
from io import BytesIO
import kaleido
# Function to create line plot
def create_line_plot(df, sheet_name):
# preprocess df first
first_row_list = [val for val in df.iloc[0].tolist() if not pd.isna(val)]
df = df.iloc[1:, ]
fig_performance = go.Figure() # Creates the Plotly figure
# Iterate over each column in the DataFrame
for i, col in enumerate(df.columns[[2, 4]]):
# Prepare the y-values - drop NaN values
y_values = df[col].dropna()
# Define legend label
legend_label = f"{first_row_list[i]} {col}: {y_values.iloc[-1]:.4f}"
# Add a trace to the figure for this column
colors = ['blue', 'green']
fig_performance.add_trace(
go.Scatter(
x=df.index[1:],
y=y_values[1:],
mode='lines',
name=legend_label,
line = dict(color = colors[i])
)
)
fig_performance.update_layout( # Customize the layout of the figure
title=f"Cumulative Total Return",
title_x=0.5, # Center the title
xaxis_title="",
yaxis_title="",
legend_title="",
template="plotly_white",
legend=dict(
font=dict(size=10), # Decrease font size to accommodate larger labels
orientation="v",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
xaxis=dict(
title="", # Assuming you want no specific title for the x-axis
tickfont=dict(size=7.5),
showline=True,
showgrid=False,
linecolor='black',
tickangle=-45, # Negative angle can sometimes be more visually appealing
# tickformat="%Y-%m-%d", # Example date format (customize as needed)
# dtick="M1", # Tick spacing by one month, adjust according to your data density
tickmode="auto", # Use automatic tick generation
nticks=20, # Limit the number of total ticks, adjust to suit your data density
),
yaxis=dict(
title="", # Assuming you want no specific title for the x-axis
tickfont=dict(size=8.5),
showline=True,
showgrid=False,
# linecolor='black',
# tickangle=-45, # Negative angle can sometimes be more visually appealing
# tickformat="%Y-%m-%d", # Example date format (customize as needed)
# dtick="M1", # Tick spacing by one month, adjust according to your data density
tickmode="auto", # Use automatic tick generation
# nticks=20, # Limit the number of total ticks, adjust to suit your data density
),
)
# fig_performance.show()
return fig_performance
def create_globe_plot(df, sheet_name):
df.columns = df.iloc[0]
df = df[1:]
# color map options
color_continuous_scales = ['aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance',
'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg',
'brwnyl', 'bugn', 'bupu', 'burg', 'burgyl', 'cividis', 'curl',
'darkmint', 'deep', 'delta', 'dense', 'earth', 'edge', 'electric',
'emrld', 'fall', 'geyser', 'gnbu', 'gray', 'greens', 'greys',
'haline', 'hot', 'hsv', 'ice', 'icefire', 'inferno', 'jet',
'magenta', 'magma', 'matter', 'mint', 'mrybm', 'mygbm', 'oranges',
'orrd', 'oryel', 'oxy', 'peach', 'phase', 'picnic', 'pinkyl',
'piyg', 'plasma', 'plotly3', 'portland', 'prgn', 'pubu', 'pubugn',
'puor', 'purd', 'purp', 'purples', 'purpor', 'rainbow', 'rdbu',
'rdgy', 'rdpu', 'rdylbu', 'rdylgn', 'redor', 'reds', 'solar',
'spectral', 'speed', 'sunset', 'sunsetdark', 'teal', 'tealgrn',
'tealrose', 'tempo', 'temps', 'thermal', 'tropic', 'turbid',
'turbo', 'twilight', 'viridis', 'ylgn', 'ylgnbu', 'ylorbr',
'ylorrd']
# different ways a globe can be represented in 2-D
projections = ['equirectangular', 'mercator', 'orthographic', 'natural earth',
'kavrayskiy7', 'miller', 'robinson', 'eckert4', 'azimuthal equal area', 'azimuthal equidistant',
'conic equal area', 'conic conformal', 'conic equidistant', 'gnomonic', 'stereographic',
'mollweide', 'hammer', 'transverse mercator', 'albers usa', 'winkel tripel', 'aitoff', 'sinusoidal']
# scope is `world` by default (as below)
scopes = ['world', 'usa', 'europe', 'asia', 'africa', 'north america', 'south america']
# active country weights
fig_domicile = px.choropleth(df, locations=df.index, locationmode='country names', color='Active Weight',
title='Active Country Weights (%)', height=650, hover_name=df.index, color_continuous_scale='temps_r', projection='natural earth')
# fig_domicile.show()
return fig_domicile
def create_under_over_weight_plot(df, sheet_name, level=0):
# Replace the header
df.columns = df.iloc[0]
df = df[1:]
colors = ['blue', 'green', 'yellow']
# Define the column names corresponding to each level
columns = ['Economic Sector - FactSet', 'Industry - FactSet', 'company_name']
df_grouped = df.groupby(columns[level]).sum().reset_index()
last_three_cols = df.columns[-3:].tolist()
# Melt the DataFrame to a long format
df_melted = df_grouped.melt(id_vars=columns[level],
value_vars=last_three_cols,
var_name='Weight Category',
value_name='Value')
df_sorted = df_melted.sort_values(by='Value', ascending=True)
# Create the plot
fig = px.bar(df_sorted,
x='Value',
y=columns[level],
color='Weight Category',
orientation='h',
title="Sector Portfolio Weights",
color_discrete_sequence=colors)
# Update axises labels
fig.update_yaxes(title_text=columns[level], tickfont=dict(size=12))
fig.update_xaxes(title_text="Port. Weight", tickfont=dict(size=12))
fig.show()
return fig
# Mapping of sheet names to plot types
plot_mapper = {
'performance total return': lambda df, sheet_name, level=None: create_line_plot(df, sheet_name),
'domicile country exposure': lambda df, sheet_name, level=None: create_globe_plot(df, sheet_name),
'sector weights exposure': lambda df, sheet_name, level=0: create_under_over_weight_plot(df, sheet_name, level),
# More sheets can be added
}
# Open excel file
xls = pd.ExcelFile('./Sample_PA_Output.xlsx')
wb = load_workbook(xls) # Load workbook
# Iterate over each sheet in the excel file
# sn_list = [x for x in xls.sheet_names]
# sn_list = ['performance total return']
# sn_list = ['domicile country exposure']
# sn_list = ['sector weights exposure']
sn_list = ['performance total return', 'sector weights exposure']
for sheet_name in sn_list:
df = pd.read_excel(xls, sheet_name=sheet_name, header=None)
# Drop the row full of NaNs
df.dropna(how='all', inplace=True)
# Set the first column (date) as index
df.set_index(df.columns[0], inplace=True)
# print(df)
# df.columns
level=1 #modify to make it an input in actual script
plot = plot_mapper[sheet_name](df, sheet_name, level)
type(plot)
# Get the sheet to add image to
ws = wb[sheet_name]
# img_bytes = plot.to_image(format="png")
# image_data = BytesIO(img_bytes) # Convert bytes to a BytesIO object
# img = Image(image_data)
# ws.add_image(img, 'K2')
max_col = len(df.columns)
start_row = 0 # Excel rows are indexed starting at 0, but row index 0 is the header
start_col = max_col + 2 # Start from the column after the DataFrame table, Indexed from zero
img_bytes = plot.to_image(format="png")
image_data = BytesIO(img_bytes) # Convert bytes to a BytesIO object
img = Image(image_data)
ws.add_image(img, ws.cell(row=start_row+2, column=start_col).coordinate)
# Save with a new name
wb.save("Modified.xlsx")
