Architecture of a Multi-Index Agent

After a successful single-index agent creation, I am pondering about creating a multi-index agent, the benefits are obvious:

Scalability: Add new indexes without changing core code
Maintainability: Each index config is isolated
Flexibility: Easy to modify methodology for one index without affecting others
Testability: Test each index independently
Reusability: Share common logic (QC, processing rules, weighting strategies)
Version Control: Track methodology changes in config files
User-Friendly: Users select which index to run from UI
Audit Trail: Track which config was used for each run

It also comes with the following:

More complex initial setup
May be over-engineered if only 2-3 indexes
Requires good documentation
Config files need to be well-maintained

The steps are

Configuration-Driven Methodology System: Instead of hard-coding index rules, use configuration files:

config/
├── indexes/
│ ├── ev_index.yaml # Current EV Index
│ ├── renewable_energy_index.yaml
│ ├── tech_index.yaml
│ └── …
├── processing_rules/
│ ├── ev_filters.py
│ ├── renewable_filters.py
│ └── base_filters.py
└── weighting_strategies/
├── geographic_split.py
├── market_cap_weighted.py
└── equal_weighted.py

2. Abstract Base Classes Pattern, to create a hierarchy of abstract classes:

  # index_base.py
  from abc import ABC, abstractmethod

  class BaseIndex(ABC):
      """Base class for all indexes"""

      def __init__(self, config_path: str):
          self.config = self.load_config(config_path)
          self.processor = self.get_processor()
          self.weighting_engine = self.get_weighting_engine()
          self.qc_tools = self.get_qc_tools()

      @abstractmethod
      def load_config(self, config_path: str) -> dict:
          """Load index-specific configuration"""
          pass

      @abstractmethod
      def get_processor(self) -> 'BaseProcessor':
          """Return appropriate data processor"""
          pass

      @abstractmethod
      def get_weighting_engine(self) -> 'BaseWeighting':
          """Return appropriate weighting engine"""
          pass

      @abstractmethod
      def get_qc_tools(self) -> 'BaseQC':
          """Return appropriate QC tools"""
          pass

      def execute_query(self, query_date: str):
          """Common execution logic"""
          # 1. Run main SQL query
          raw_df = self.run_main_query(query_date)

          # 2. Process data using processor
          processed = self.processor.process_data(raw_df)

          # 3. Select components
          final_index = self.processor.apply_selection(processed)

          # 4. Apply weighting
          weighted_index = self.weighting_engine.apply_weighting(final_index)

          # 5. Run QC
          qc_results = self.qc_tools.run_checks(weighted_index)

          # 6. Generate additional tabs
          additional_tabs = self.generate_additional_tabs(final_index, query_date)

          return {
              'raw': raw_df,
              'processed': processed,
              'final_index': final_index,
              'weighted_index': weighted_index,
              'qc_results': qc_results,
              **additional_tabs
          }


  class BaseProcessor(ABC):
      """Base class for data processing"""

      @abstractmethod
      def process_data(self, df: pd.DataFrame) -> pd.DataFrame:
          """Apply processing rules"""
          pass

      @abstractmethod
      def apply_selection(self, df: pd.DataFrame) -> pd.DataFrame:
          """Select index components"""
          pass


  class BaseWeighting(ABC):
      """Base class for weighting strategies"""

      @abstractmethod
      def apply_weighting(self, df: pd.DataFrame) -> pd.DataFrame:
          """Apply weighting methodology"""
          pass


  class BaseQC(ABC):
      """Base class for QC checks"""

      @abstractmethod
      def run_checks(self, df: pd.DataFrame) -> dict:
          """Run QC checks"""
          pass

3. Index Registry / Factory Pattern to centralize index management

 # index_registry.py
  class IndexRegistry:
      """Registry for all available indexes"""

      _indexes = {}

      @classmethod
      def register(cls, index_code: str, index_class: type):
          """Register a new index"""
          cls._indexes[index_code] = index_class

      @classmethod
      def get_index(cls, index_code: str, config_path: str = None):
          """Get index instance by code"""
          if index_code not in cls._indexes:
              raise ValueError(f"Index {index_code} not registered")

          if config_path is None:
              config_path = f"config/indexes/{index_code.lower()}.yaml"

          return cls._indexes[index_code](config_path)

      @classmethod
      def list_indexes(cls) -> list:
          """List all registered indexes"""
          return list(cls._indexes.keys())


  # Register indexes
  IndexRegistry.register("EV_IDX", EVIndex)
  IndexRegistry.register("RENEWABLE_IDX", RenewableEnergyIndex)
  IndexRegistry.register("TECH_IDX", TechIndex)

4. Modular Processing Rules, make the rules pluggable

 # processing_rules.py
  class ProcessingRule(ABC):
      """Base class for processing rules"""

      @abstractmethod
      def apply(self, df: pd.DataFrame) -> pd.DataFrame:
          pass

      @abstractmethod
      def get_description(self) -> str:
          pass


  class RemoveEntityRule(ProcessingRule):
      """Remove specific entities"""

      def __init__(self, entity_ids: list):
          self.entity_ids = entity_ids

      def apply(self, df: pd.DataFrame) -> pd.DataFrame:
          return df[~df['factset_entity_id'].isin(self.entity_ids)]

      def get_description(self) -> str:
          return f"Remove entities: {', '.join(self.entity_ids)}"


  class MinimumThresholdRule(ProcessingRule):
      """Apply minimum thresholds"""

      def __init__(self, thresholds: dict):
          self.thresholds = thresholds

      def apply(self, df: pd.DataFrame) -> pd.DataFrame:
          for field, min_value in self.thresholds.items():
              df = df[df[field] >= min_value]
          return df


  # Build processing pipeline from config
  def build_processor_from_config(config: dict) -> BaseProcessor:
      rules = []
      for rule_config in config['processing_rules']:
          rule_type = rule_config['name']
          rule = RuleFactory.create(rule_type, rule_config)
          rules.append(rule)

      return ConfigurableProcessor(rules)

5. Generic QC Framework, QC checks with index-specific parameters:

# qc_framework.py
  class QCCheck(ABC):
      """Base class for QC checks"""

      @abstractmethod
      def run(self, df: pd.DataFrame) -> dict:
          """Run the check"""
          pass


  class TotalWeightCheck(QCCheck):
      """Check total weight equals target"""

      def __init__(self, target: float = 1.0, tolerance: float = 1e-6):
          self.target = target
          self.tolerance = tolerance

      def run(self, df: pd.DataFrame) -> dict:
          actual = df['final_weight'].sum()
          passed = abs(actual - self.target) < self.tolerance

          return {
              'name': 'Total Weight',
              'passed': passed,
              'expected': self.target,
              'actual': actual,
              'deviation': abs(actual - self.target)
          }


  class ComponentCountCheck(QCCheck):
      """Check component count"""

      def __init__(self, expected_count: int):
          self.expected_count = expected_count

      def run(self, df: pd.DataFrame) -> dict:
          actual = len(df)
          passed = actual == self.expected_count

          return {
              'name': 'Component Count',
              'passed': passed,
              'expected': self.expected_count,
              'actual': actual
          }


  # Build QC suite from config
  def build_qc_from_config(config: dict) -> BaseQC:
      checks = []
      for check_name, params in config['qc']['checks'].items():
          check = QCCheckFactory.create(check_name, params)
          checks.append(check)

      return ConfigurableQC(checks)

6. Modified Agent Architecture

  # generic_agent.py
  class GenericIndexAgent:
      """Generic agent for all indexes"""

      def __init__(self):
          self.api_key = os.getenv('ANTHROPIC_API_KEY')
          self.client = anthropic.Anthropic(api_key=self.api_key)

          # Current index (set by user)
          self.current_index = None
          self.current_index_code = None

          # Available indexes
          self.available_indexes = IndexRegistry.list_indexes()

      def set_index(self, index_code: str):
          """Set the active index"""
          self.current_index = IndexRegistry.get_index(index_code)
          self.current_index_code = index_code
          logger.info(f"Switched to index: {index_code}")

      def execute_query(self, query_date: str = None):
          """Execute query for current index"""
          if self.current_index is None:
              raise ValueError("No index selected. Use set_index() first.")

          return self.current_index.execute_query(query_date)

      def list_available_indexes(self) -> list:
          """List all available indexes"""
          return [
              {
                  'code': code,
                  'name': IndexRegistry.get_index(code).config['index']['name'],
                  'description': IndexRegistry.get_index(code).config['index']['description']
              }
              for code in self.available_indexes
          ]

7. Update UI

 # app.py additions
  def display_index_selector():
      """Display index selection dropdown"""
      with st.sidebar:
          st.markdown("### Select Index")

          indexes = st.session_state.agent.list_available_indexes()

          index_options = {
              idx['name']: idx['code']
              for idx in indexes
          }

          selected_index_name = st.selectbox(
              "Choose index to run",
              options=list(index_options.keys()),
              help="Select which index methodology to use"
          )

          selected_code = index_options[selected_index_name]

          if st.session_state.get('current_index_code') != selected_code:
              st.session_state.agent.set_index(selected_code)
              st.session_state.current_index_code = selected_code
              st.success(f"Switched to: {selected_index_name}")
              st.rerun()

8. Database-Backed configuration, it’s official then store all configs in database

 -- Index configuration table
  CREATE TABLE index_configurations (
      index_code VARCHAR(50) PRIMARY KEY,
      index_name VARCHAR(255),
      description TEXT,
      config_json TEXT,  -- Store YAML/JSON config
      active BOOLEAN DEFAULT TRUE,
      created_date DATE,
      last_modified DATE
  );

  -- Index run history
  CREATE TABLE index_run_history (
      run_id BIGINT PRIMARY KEY,
      index_code VARCHAR(50),
      query_date DATE,
      run_timestamp TIMESTAMP,
      components_count INT,
      qc_passed BOOLEAN,
      output_file_path VARCHAR(500)
  );

Naixian Zhang

Architecture of a Multi-Index Agent

Leave a comment Cancel reply

Share this:

Related

Leave a comment Cancel reply