I am learning great deal from AI!
Even with the world’s best intelligence tools, we must continue to learn along the way. In the future, we will be able to converse with AI and ask it to tackle the most challenging problems. This means we need to “KNOW” more, but we don’t necessarily have to spend time on tedious details to make things work; that responsibility should be handed over to AI. Instead, we should focus on mastering the highest forms of intelligence, which involve reasoning, inferring, and designing, all supported by a deep understanding.
For example, in the below function, there are three clever designs: 1. break into functions of functions to do some recursive work, and another part to check if it is needed to do so; 2. duplicate checking and treatment; 3. crafty use of dictionaries in Python.
def filter_entities(suppliers, partners_direct, eligible_ids, aiid_dic, step3_df, n=45):
def get_top_entities(ai_companies, existing_entities=set()):
filtered_results = []
for ai_company_id in ai_companies:
# Filter suppliers for this AI company
suppliers_for_company = suppliers[(suppliers.factset_entity_id == ai_company_id) &
(suppliers.related_entity_id.isin(eligible_ids)) &
(~suppliers.related_entity_id.isin(existing_entities))]
suppliers_for_company = suppliers_for_company.sort_values('mkt_usd', ascending=False)
top_suppliers = suppliers_for_company.head(10)
# Filter direct partners for this AI company
partners_for_company = partners_direct[(partners_direct.factset_entity_id == ai_company_id) &
(partners_direct.related_entity_id.isin(eligible_ids)) &
(~partners_direct.related_entity_id.isin(existing_entities))]
partners_for_company = partners_for_company.sort_values('mkt_usd', ascending=False)
top_partners = partners_for_company.head(3)
# Combine results
combined_results = pd.concat([top_suppliers, top_partners])
combined_results['AI_Company'] = ai_company_id
combined_results['Order'] = aiid_dic[ai_company_id]
filtered_results.append(combined_results)
existing_entities.update(combined_results.related_entity_id.tolist())
# Combine all results and sort
final_results = pd.concat(filtered_results)
final_results = final_results.sort_values(['Order', 'mkt_usd'], ascending=[True, False])
return final_results, existing_entities
# Get initial top entities for the original aiid_dic
top_entities, existing_entities = get_top_entities(aiid_dic.keys())
top_n = top_entities.drop_duplicates(subset='related_entity_id')
target_n = 50 - len(aiid_dic)
while len(top_n) < target_n:
# Add the next highest CKS rank from step3
new_companies = step3_df[~step3_df.factset_entity_id.isin(aiid_dic.keys())].sort_values('CKS_Rank')
if len(new_companies) == 0:
print(f"Warning: Ran out of new companies to add. Only found {len(top_n)} unique entities.")
break
next_highest = new_companies.iloc[0]
new_ai_id = next_highest.factset_entity_id
new_order = max(aiid_dic.values()) + 1
aiid_dic[new_ai_id] = new_order
# Get entities for the new AI company
new_entities, existing_entities = get_top_entities([new_ai_id], existing_entities)
# Check for overlap and remove if necessary
overlap_ids = set(new_entities.related_entity_id) & set(top_n.related_entity_id)
if overlap_ids:
top_n = top_n[~top_n.related_entity_id.isin(overlap_ids)]
existing_entities.difference_update(overlap_ids)
# Add new entities to top_n, prioritizing suppliers then partners
top_n = pd.concat([top_n, new_entities])
top_n = top_n.drop_duplicates(subset='related_entity_id')
# Update target_n as aiid_dic has grown
target_n = 50 - len(aiid_dic)
# Ensure we have at most target_n entities
top_n = top_n.head(target_n)
return top_n, aiid_dic
AI is proficient is leveraging public APIs such as treasury yields, OECD’s CLI metrics:
# need to programmically extract the data from OECD website
from io import StringIO
import requests
import pandas as pd
import xml.etree.ElementTree as ET
def fetch_oecd_data(url):
response = requests.get(url)
if response.status_code == 200:
root = ET.fromstring(response.content)
ns = {'ns': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic'}
data = []
for series in root.findall('.//ns:Series', ns):
for obs in series.findall('.//ns:Obs', ns):
time_element = obs.find('.//ns:ObsDimension[@id="TIME_PERIOD"]', ns)
value_element = obs.find('.//ns:ObsValue', ns)
if time_element is not None and value_element is not None:
time = time_element.attrib.get('value')
value = value_element.attrib.get('value')
if time and value:
data.append({'Time period': time, 'Value': value})
if not data:
print("No data found in the XML response.")
return None
df = pd.DataFrame(data)
df['Value'] = pd.to_numeric(df['Value'], errors='coerce')
df['Time period'] = pd.to_datetime(df['Time period'], format='%Y-%m', errors='coerce').dt.strftime('%Y-%b')
df = df.dropna()
return df
else:
print(f"Failed to retrieve data. Status code: {response.status_code}")
return None
url = "https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_CLI,/G7.M.LI...AA...H?startPeriod=2014-01&endPeriod=2024-08"
oecd_data = fetch_oecd_data(url)
if oecd_data is not None:
oecd_data = oecd_data.sort_values('Time period')
print("Composite leading indicators")
print("Reference area: G7 • Frequency of observation: Monthly • Measure: Composite leading indicator (CLI) • Calculation methodology: OECD harmonised")
print("Unit of measure: Index, Amplitude adjusted")
print("\nTime period\t\tValue")
for _, row in oecd_data.iterrows():
print(f"{row['Time period']}\t\t{row['Value']:.2f}")
oecd_data[oecd_data['Time period'] == '2022-Sep'] #98.99 but JZ's 98.56
# treasury data from fiscal department API
import requests
import pandas as pd
from datetime import datetime
def fetch_treasury_data(start_date, end_date, security_type):
base_url = "https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v2/accounting/od/avg_interest_rates"
params = {
"fields": "record_date,security_desc,avg_interest_rate_amt",
"filter": f"record_date:gte:{start_date},record_date:lte:{end_date},"
f"security_desc:eq:{security_type}",
"sort": "record_date",
"format": "json",
"page[size]": 10000
}
response = requests.get(base_url, params=params)
if response.status_code == 200:
json_data = response.json()
if 'data' in json_data and json_data['data']:
data = json_data['data']
df = pd.DataFrame(data)
df['record_date'] = pd.to_datetime(df['record_date'])
df['avg_interest_rate_amt'] = pd.to_numeric(df['avg_interest_rate_amt'])
return df
else:
print(f"No data returned for {security_type}. API response:")
print(json_data)
return None
else:
print(f"Failed to retrieve data for {security_type}. Status code: {response.status_code}")
return None
def combine_treasury_yields(start_date, end_date):
bills = fetch_treasury_data(start_date, end_date, "Treasury Bills")
bonds = fetch_treasury_data(start_date, end_date, "Treasury Bonds")
if bills is None and bonds is None:
return None
if bills is not None:
bills = bills.rename(columns={'avg_interest_rate_amt': '3_month_yield'})
if bonds is not None:
bonds = bonds.rename(columns={'avg_interest_rate_amt': '10_year_yield'})
if bills is not None and bonds is not None:
combined = pd.merge(bills[['record_date', '3_month_yield']],
bonds[['record_date', '10_year_yield']],
on='record_date',
how='outer')
elif bills is not None:
combined = bills[['record_date', '3_month_yield']]
elif bonds is not None:
combined = bonds[['record_date', '10_year_yield']]
combined = combined.sort_values('record_date')
combined = combined.resample('M', on='record_date').last()
combined.reset_index(inplace=True)
return combined
# Set date range
start_date = "2004-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")
# Fetch and combine data
treasury_yields = combine_treasury_yields(start_date, end_date)
if treasury_yields is not None:
print("\nFirst few rows of the data:")
print(treasury_yields.head())
print(f"\nShape of the DataFrame: {treasury_yields.shape}")
print("\nColumns in the DataFrame:")
print(treasury_yields.columns)
else:
print("Failed to retrieve treasury yield data.")
A new trick to apply conditions
# Apply conditions using numpy.select
tr['tr_cycle'] = np.select(
[recession_condition, expansion_condition, recovery_condition, retracement_condition],
['recession', 'expansion', 'recovery', 'retracement'],
default='unknown' # Added an unknown default for any cases that don't match the conditions
)