Finance Agent uses PostgreSQL with pgvector for storing embeddings and metadata, while full documents are stored in Railway S3. This hybrid approach balances search performance with cost-effective storage.
┌─────────────────────────────────────────────────────────────┐│ PostgreSQL + pgvector ││ • Embeddings (384-dim vectors) ││ • Metadata (ticker, year, quarter, section) ││ • Chunk text (truncated for search context) │└─────────────────────────────────────────────────────────────┘┌─────────────────────────────────────────────────────────────┐│ Railway S3 ││ • Full transcript text ││ • Complete SEC filing documents │└─────────────────────────────────────────────────────────────┘
Stores chunked earnings call transcripts with vector embeddings for semantic search.
CREATE TABLE transcript_chunks ( id SERIAL PRIMARY KEY, chunk_text TEXT NOT NULL, -- Transcript chunk (1000 chars, 200 overlap) embedding VECTOR(384), -- all-MiniLM-L6-v2 embedding ticker VARCHAR(10) NOT NULL, -- Company ticker (e.g., "AAPL") year INTEGER NOT NULL, -- Year (e.g., 2024) quarter INTEGER NOT NULL, -- Quarter (1-4) chunk_index INTEGER, -- Position in transcript speaker VARCHAR(255), -- Speaker name (if available) metadata JSONB, -- Additional metadata created_at TIMESTAMP DEFAULT NOW());-- Indexes for performanceCREATE INDEX idx_transcript_ticker ON transcript_chunks(ticker);CREATE INDEX idx_transcript_year_quarter ON transcript_chunks(year, quarter);CREATE INDEX idx_transcript_embedding ON transcript_chunks USING ivfflat (embedding vector_cosine_ops);
Stores chunked SEC 10-K annual filing text with section metadata for targeted retrieval.
CREATE TABLE ten_k_chunks ( id SERIAL PRIMARY KEY, chunk_text TEXT NOT NULL, -- Filing chunk (variable size) embedding VECTOR(384), -- all-MiniLM-L6-v2 embedding ticker VARCHAR(10) NOT NULL, -- Company ticker fiscal_year INTEGER NOT NULL, -- Fiscal year of filing sec_section VARCHAR(50), -- Section identifier (item_1, item_7, etc.) sec_section_title VARCHAR(255), -- Human-readable section name is_financial_statement BOOLEAN, -- True if chunk is from financial statements chunk_index INTEGER, -- Position within section metadata JSONB, -- Additional context created_at TIMESTAMP DEFAULT NOW());-- IndexesCREATE INDEX idx_10k_ticker_year ON ten_k_chunks(ticker, fiscal_year);CREATE INDEX idx_10k_section ON ten_k_chunks(sec_section);CREATE INDEX idx_10k_embedding ON ten_k_chunks USING ivfflat (embedding vector_cosine_ops);
Stores extracted financial tables from SEC 10-K filings in structured JSONB format.
CREATE TABLE ten_k_tables ( id SERIAL PRIMARY KEY, ticker VARCHAR(10) NOT NULL, fiscal_year INTEGER NOT NULL, table_index INTEGER, -- Position in filing content JSONB NOT NULL, -- Structured table data statement_type VARCHAR(100), -- income_statement, balance_sheet, cash_flow is_financial_statement BOOLEAN, -- True for core financial statements sec_section VARCHAR(50), -- Section where table appears metadata JSONB, created_at TIMESTAMP DEFAULT NOW());-- IndexesCREATE INDEX idx_tables_ticker_year ON ten_k_tables(ticker, fiscal_year);CREATE INDEX idx_tables_statement_type ON ten_k_tables(statement_type);CREATE INDEX idx_tables_content ON ten_k_tables USING gin (content);
-- Enable pgvector extensionCREATE EXTENSION IF NOT EXISTS vector;-- Create IVFFlat index for fast approximate nearest neighbor search-- Lists parameter: sqrt(total_rows) is a good heuristicCREATE INDEX idx_transcript_embeddingON transcript_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);CREATE INDEX idx_10k_embeddingON ten_k_chunks USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
-- Get balance sheet tables for Apple 2023SELECT contentFROM ten_k_tablesWHERE ticker = 'AAPL' AND fiscal_year = 2023 AND statement_type = 'balance_sheet' AND is_financial_statement = true;