Compact Designing of a Function by AI

I am learning great deal from AI!

Even with the world’s best intelligence tools, we must continue to learn along the way. In the future, we will be able to converse with AI and ask it to tackle the most challenging problems. This means we need to “KNOW” more, but we don’t necessarily have to spend time on tedious details to make things work; that responsibility should be handed over to AI. Instead, we should focus on mastering the highest forms of intelligence, which involve reasoning, inferring, and designing, all supported by a deep understanding.

For example, in the below function, there are three clever designs: 1. break into functions of functions to do some recursive work, and another part to check if it is needed to do so; 2. duplicate checking and treatment; 3. crafty use of dictionaries in Python.

def filter_entities(suppliers, partners_direct, eligible_ids, aiid_dic, step3_df, n=45):
    def get_top_entities(ai_companies, existing_entities=set()):
        filtered_results = []
        for ai_company_id in ai_companies:
            # Filter suppliers for this AI company
            suppliers_for_company = suppliers[(suppliers.factset_entity_id == ai_company_id) & 
                                              (suppliers.related_entity_id.isin(eligible_ids)) & 
                                              (~suppliers.related_entity_id.isin(existing_entities))]
            suppliers_for_company = suppliers_for_company.sort_values('mkt_usd', ascending=False)
            top_suppliers = suppliers_for_company.head(10)

            # Filter direct partners for this AI company
            partners_for_company = partners_direct[(partners_direct.factset_entity_id == ai_company_id) & 
                                                   (partners_direct.related_entity_id.isin(eligible_ids)) & 
                                                   (~partners_direct.related_entity_id.isin(existing_entities))]
            partners_for_company = partners_for_company.sort_values('mkt_usd', ascending=False)
            top_partners = partners_for_company.head(3)

            # Combine results
            combined_results = pd.concat([top_suppliers, top_partners])
            combined_results['AI_Company'] = ai_company_id
            combined_results['Order'] = aiid_dic[ai_company_id]
            
            filtered_results.append(combined_results)
            existing_entities.update(combined_results.related_entity_id.tolist())

        # Combine all results and sort
        final_results = pd.concat(filtered_results)
        final_results = final_results.sort_values(['Order', 'mkt_usd'], ascending=[True, False])
        return final_results, existing_entities

    # Get initial top entities for the original aiid_dic
    top_entities, existing_entities = get_top_entities(aiid_dic.keys())
    top_n = top_entities.drop_duplicates(subset='related_entity_id')

    target_n = 50 - len(aiid_dic)

    while len(top_n) < target_n:
        # Add the next highest CKS rank from step3
        new_companies = step3_df[~step3_df.factset_entity_id.isin(aiid_dic.keys())].sort_values('CKS_Rank')
        
        if len(new_companies) == 0:
            print(f"Warning: Ran out of new companies to add. Only found {len(top_n)} unique entities.")
            break
        
        next_highest = new_companies.iloc[0]
        new_ai_id = next_highest.factset_entity_id
        new_order = max(aiid_dic.values()) + 1
        aiid_dic[new_ai_id] = new_order

        # Get entities for the new AI company
        new_entities, existing_entities = get_top_entities([new_ai_id], existing_entities)

        # Check for overlap and remove if necessary
        overlap_ids = set(new_entities.related_entity_id) & set(top_n.related_entity_id)
        if overlap_ids:
            top_n = top_n[~top_n.related_entity_id.isin(overlap_ids)]
            existing_entities.difference_update(overlap_ids)

        # Add new entities to top_n, prioritizing suppliers then partners
        top_n = pd.concat([top_n, new_entities])
        top_n = top_n.drop_duplicates(subset='related_entity_id')
        
        # Update target_n as aiid_dic has grown
        target_n = 50 - len(aiid_dic)

    # Ensure we have at most target_n entities
    top_n = top_n.head(target_n)

    return top_n, aiid_dic

AI is proficient is leveraging public APIs such as treasury yields, OECD’s CLI metrics:

# need to programmically extract the data from OECD website
from io import StringIO
import requests
import pandas as pd
import xml.etree.ElementTree as ET

def fetch_oecd_data(url):
    response = requests.get(url)
    
    if response.status_code == 200:
        root = ET.fromstring(response.content)
        ns = {'ns': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic'}
        
        data = []
        for series in root.findall('.//ns:Series', ns):
            for obs in series.findall('.//ns:Obs', ns):
                time_element = obs.find('.//ns:ObsDimension[@id="TIME_PERIOD"]', ns)
                value_element = obs.find('.//ns:ObsValue', ns)
                
                if time_element is not None and value_element is not None:
                    time = time_element.attrib.get('value')
                    value = value_element.attrib.get('value')
                    if time and value:
                        data.append({'Time period': time, 'Value': value})
        
        if not data:
            print("No data found in the XML response.")
            return None
        
        df = pd.DataFrame(data)
        df['Value'] = pd.to_numeric(df['Value'], errors='coerce')
        df['Time period'] = pd.to_datetime(df['Time period'], format='%Y-%m', errors='coerce').dt.strftime('%Y-%b')
        df = df.dropna()
        
        return df
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

url = "https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_CLI,/G7.M.LI...AA...H?startPeriod=2014-01&endPeriod=2024-08"

oecd_data = fetch_oecd_data(url)

if oecd_data is not None:
    oecd_data = oecd_data.sort_values('Time period')
    print("Composite leading indicators")
    print("Reference area: G7 • Frequency of observation: Monthly • Measure: Composite leading indicator (CLI) • Calculation methodology: OECD harmonised")
    print("Unit of measure: Index, Amplitude adjusted")
    print("\nTime period\t\tValue")
    for _, row in oecd_data.iterrows():
        print(f"{row['Time period']}\t\t{row['Value']:.2f}")
oecd_data[oecd_data['Time period'] == '2022-Sep'] #98.99 but JZ's 98.56       

# treasury data from fiscal department API
import requests
import pandas as pd
from datetime import datetime

def fetch_treasury_data(start_date, end_date, security_type):
    base_url = "https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v2/accounting/od/avg_interest_rates"
    
    params = {
        "fields": "record_date,security_desc,avg_interest_rate_amt",
        "filter": f"record_date:gte:{start_date},record_date:lte:{end_date},"
                  f"security_desc:eq:{security_type}",
        "sort": "record_date",
        "format": "json",
        "page[size]": 10000
    }
    
    response = requests.get(base_url, params=params)
    
    if response.status_code == 200:
        json_data = response.json()
        if 'data' in json_data and json_data['data']:
            data = json_data['data']
            df = pd.DataFrame(data)
            df['record_date'] = pd.to_datetime(df['record_date'])
            df['avg_interest_rate_amt'] = pd.to_numeric(df['avg_interest_rate_amt'])
            return df
        else:
            print(f"No data returned for {security_type}. API response:")
            print(json_data)
            return None
    else:
        print(f"Failed to retrieve data for {security_type}. Status code: {response.status_code}")
        return None

def combine_treasury_yields(start_date, end_date):
    bills = fetch_treasury_data(start_date, end_date, "Treasury Bills")
    bonds = fetch_treasury_data(start_date, end_date, "Treasury Bonds")
    
    if bills is None and bonds is None:
        return None
    
    if bills is not None:
        bills = bills.rename(columns={'avg_interest_rate_amt': '3_month_yield'})
    
    if bonds is not None:
        bonds = bonds.rename(columns={'avg_interest_rate_amt': '10_year_yield'})
    
    if bills is not None and bonds is not None:
        combined = pd.merge(bills[['record_date', '3_month_yield']], 
                            bonds[['record_date', '10_year_yield']], 
                            on='record_date', 
                            how='outer')
    elif bills is not None:
        combined = bills[['record_date', '3_month_yield']]
    elif bonds is not None:
        combined = bonds[['record_date', '10_year_yield']]
    
    combined = combined.sort_values('record_date')
    combined = combined.resample('M', on='record_date').last()
    combined.reset_index(inplace=True)
    
    return combined

# Set date range
start_date = "2004-01-01"
end_date = datetime.now().strftime("%Y-%m-%d")

# Fetch and combine data
treasury_yields = combine_treasury_yields(start_date, end_date)

if treasury_yields is not None:
    print("\nFirst few rows of the data:")
    print(treasury_yields.head())
    print(f"\nShape of the DataFrame: {treasury_yields.shape}")
    print("\nColumns in the DataFrame:")
    print(treasury_yields.columns)
else:
    print("Failed to retrieve treasury yield data.")

A new trick to apply conditions

# Apply conditions using numpy.select
tr['tr_cycle'] = np.select(
    [recession_condition, expansion_condition, recovery_condition, retracement_condition],
    ['recession', 'expansion', 'recovery', 'retracement'],
    default='unknown'  # Added an unknown default for any cases that don't match the conditions
)

Leave a comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.