Skip to content
Learn Agentic AI9 min read0 views

Building a Competitive Intelligence Agent: Monitoring Markets with AI

Design an AI agent that continuously monitors competitors through web scraping, news aggregation, and LLM-powered analysis to generate actionable competitive intelligence alerts.

Why Automate Competitive Intelligence

Tracking competitors manually does not scale. By the time someone on your team notices a competitor launched a new feature or changed their pricing, the information is days old. An AI competitive intelligence agent runs continuously, monitors multiple signal sources, analyzes changes with LLM reasoning, and delivers actionable alerts to your team in real time.

Signal Source Architecture

A robust CI agent monitors multiple data sources. Each source has its own collection mechanism but feeds into a unified analysis pipeline.

from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional


class SignalType(Enum):
    PRICING_CHANGE = "pricing_change"
    FEATURE_LAUNCH = "feature_launch"
    HIRING_TREND = "hiring_trend"
    PRESS_MENTION = "press_mention"
    REVIEW_SENTIMENT = "review_sentiment"
    FUNDING_EVENT = "funding_event"


@dataclass
class CompetitorSignal:
    competitor: str
    signal_type: SignalType
    source_url: str
    raw_content: str
    detected_at: datetime = field(default_factory=datetime.utcnow)
    analysis: Optional[str] = None
    severity: str = "low"  # low, medium, high, critical


@dataclass
class Competitor:
    name: str
    website: str
    pricing_url: Optional[str] = None
    blog_url: Optional[str] = None
    careers_url: Optional[str] = None
    keywords: list[str] = field(default_factory=list)

Web Scraping for Pricing and Feature Changes

The most valuable CI signal is when a competitor changes their pricing or ships a new feature. We scrape key pages and use content hashing to detect changes.

import hashlib
import httpx
from bs4 import BeautifulSoup


class PageMonitor:
    def __init__(self, db):
        self.db = db

    async def check_page(
        self, url: str, competitor: str, signal_type: SignalType
    ) -> Optional[CompetitorSignal]:
        async with httpx.AsyncClient() as client:
            resp = await client.get(url, follow_redirects=True, timeout=15)
            resp.raise_for_status()

        soup = BeautifulSoup(resp.text, "html.parser")
        # Remove nav, footer, scripts for cleaner comparison
        for tag in soup.find_all(["nav", "footer", "script", "style"]):
            tag.decompose()
        text_content = soup.get_text(separator="\n", strip=True)
        content_hash = hashlib.sha256(text_content.encode()).hexdigest()

        previous_hash = await self.db.get_page_hash(url)
        if previous_hash and previous_hash != content_hash:
            await self.db.update_page_hash(url, content_hash)
            return CompetitorSignal(
                competitor=competitor,
                signal_type=signal_type,
                source_url=url,
                raw_content=text_content[:5000],
            )
        elif not previous_hash:
            await self.db.update_page_hash(url, content_hash)
        return None

    async def monitor_competitors(
        self, competitors: list[Competitor]
    ) -> list[CompetitorSignal]:
        signals = []
        for comp in competitors:
            if comp.pricing_url:
                sig = await self.check_page(
                    comp.pricing_url, comp.name,
                    SignalType.PRICING_CHANGE,
                )
                if sig:
                    signals.append(sig)
            if comp.blog_url:
                sig = await self.check_page(
                    comp.blog_url, comp.name,
                    SignalType.FEATURE_LAUNCH,
                )
                if sig:
                    signals.append(sig)
        return signals

News Monitoring with RSS and Search APIs

Complement direct scraping with news monitoring to catch press releases, analyst reports, and industry coverage.

import feedparser
from datetime import datetime, timedelta


class NewsMonitor:
    def __init__(self, news_api_key: str):
        self.api_key = news_api_key

    async def search_news(
        self, competitor: Competitor, days_back: int = 1
    ) -> list[CompetitorSignal]:
        signals = []
        query = f'"{competitor.name}" OR ' + " OR ".join(
            f'"{kw}"' for kw in competitor.keywords
        )
        from_date = (
            datetime.utcnow() - timedelta(days=days_back)
        ).strftime("%Y-%m-%d")

        async with httpx.AsyncClient() as client:
            resp = await client.get(
                "https://newsapi.org/v2/everything",
                params={
                    "q": query,
                    "from": from_date,
                    "sortBy": "publishedAt",
                    "apiKey": self.api_key,
                    "pageSize": 20,
                },
            )
            articles = resp.json().get("articles", [])

        for article in articles:
            signals.append(CompetitorSignal(
                competitor=competitor.name,
                signal_type=SignalType.PRESS_MENTION,
                source_url=article["url"],
                raw_content=(
                    f"{article['title']}\n{article.get('description', '')}"
                ),
            ))
        return signals

    async def check_rss_feeds(
        self, feeds: dict[str, str]
    ) -> list[CompetitorSignal]:
        """Check RSS feeds. feeds = {competitor_name: feed_url}."""
        signals = []
        cutoff = datetime.utcnow() - timedelta(hours=24)
        for comp_name, feed_url in feeds.items():
            feed = feedparser.parse(feed_url)
            for entry in feed.entries:
                published = datetime(*entry.published_parsed[:6])
                if published > cutoff:
                    signals.append(CompetitorSignal(
                        competitor=comp_name,
                        signal_type=SignalType.PRESS_MENTION,
                        source_url=entry.link,
                        raw_content=f"{entry.title}\n{entry.get('summary', '')}",
                    ))
        return signals

LLM-Powered Analysis and Severity Scoring

Raw signals are noise until analyzed. The LLM layer classifies each signal, assesses its strategic impact, and assigns a severity level.

See AI Voice Agents Handle Real Calls

Book a free demo or calculate how much you can save with AI voice automation.

from openai import AsyncOpenAI
import json

client = AsyncOpenAI()

ANALYSIS_PROMPT = """You are a competitive intelligence analyst.

Analyze this signal from competitor "{competitor}":

Signal type: {signal_type}
Source: {source_url}
Content:
{content}

Our company sells: AI-powered customer service tools for B2B SaaS.

Return JSON with:
- "summary": 2-3 sentence executive summary
- "severity": "low", "medium", "high", or "critical"
- "impact_areas": list of affected business areas
- "recommended_actions": list of 1-3 suggested responses
- "confidence": float 0-1 indicating analysis confidence
"""


async def analyze_signal(signal: CompetitorSignal) -> CompetitorSignal:
    response = await client.chat.completions.create(
        model="gpt-4o",
        response_format={"type": "json_object"},
        messages=[
            {"role": "system", "content": "Return valid JSON only."},
            {
                "role": "user",
                "content": ANALYSIS_PROMPT.format(
                    competitor=signal.competitor,
                    signal_type=signal.signal_type.value,
                    source_url=signal.source_url,
                    content=signal.raw_content[:3000],
                ),
            },
        ],
    )
    result = json.loads(response.choices[0].message.content)
    signal.analysis = result["summary"]
    signal.severity = result["severity"]
    return signal

Alert Generation and Delivery

High-severity signals should trigger immediate notifications. Lower-severity signals get batched into a daily digest.

async def process_and_alert(
    signals: list[CompetitorSignal], notifier
):
    for signal in signals:
        signal = await analyze_signal(signal)

        if signal.severity in ("high", "critical"):
            await notifier.send_urgent(
                channel="#competitive-intel",
                message=(
                    f"*{signal.severity.upper()}* signal from "
                    f"*{signal.competitor}*\n"
                    f"Type: {signal.signal_type.value}\n"
                    f"Summary: {signal.analysis}\n"
                    f"Source: {signal.source_url}"
                ),
            )
        else:
            await notifier.queue_for_digest(signal)

FAQ

How do I avoid getting blocked when scraping competitor websites?

Use respectful scraping practices: honor robots.txt, add delays between requests (2-5 seconds), rotate user agents, and limit frequency to once or twice per day for each URL. For pricing pages that actively block scrapers, consider using a headless browser service or monitoring cached versions through search engine snapshots.

How do I reduce false positive alerts?

Set a confidence threshold in the LLM analysis step. Signals where the LLM reports confidence below 0.7 should be queued for human verification rather than triggering alerts. Also compare new page content against the previous version using a diff-based approach to pinpoint what actually changed rather than re-analyzing the entire page.

Legality varies by jurisdiction. In general, publicly available information on websites can be collected for competitive analysis, but you must respect terms of service. Do not scrape behind login walls, do not violate CFAA provisions, and consult legal counsel for your specific market. News APIs and public RSS feeds are always safe sources.


#CompetitiveIntelligence #MarketMonitoring #WebScraping #NewsAnalysis #Python #AgenticAI #LearnAI #AIEngineering

Share this article
C

CallSphere Team

Expert insights on AI voice agents and customer communication automation.

Try CallSphere AI Voice Agents

See how AI voice agents work for your industry. Live demo available -- no signup required.