stygian_graph/
lib.rs

1//! # Stygian Graph
2#![allow(clippy::multiple_crate_versions)]
3//!
4//! A high-performance, graph-based web scraping engine for Rust.
5//!
6//! ## Overview
7//!
8//! Stygian treats scraping pipelines as Directed Acyclic Graphs (DAGs) where each node
9//! is a pluggable service module (HTTP fetchers, AI extractors, headless browsers).
10//! Built for extreme concurrency and extensibility using hexagonal architecture.
11//!
12//! ## Quick Start
13//!
14//! ```no_run
15//! use stygian_graph::domain::graph::Pipeline;
16//! use stygian_graph::domain::pipeline::PipelineUnvalidated;
17//!
18//! #[tokio::main]
19//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
20//!     // Create a simple scraping pipeline
21//!     let config = serde_json::json!({
22//!         "nodes": [],
23//!         "edges": []
24//!     });
25//!     
26//!     let pipeline = PipelineUnvalidated::new(config)
27//!         .validate()?
28//!         .execute()
29//!         .complete(serde_json::json!({"status": "success"}));
30//!     
31//!     println!("Pipeline complete: {:?}", pipeline.results());
32//!     Ok(())
33//! }
34//! ```
35//!
36//! ## Architecture
37//!
38//! Stygian follows hexagonal (ports & adapters) architecture:
39//!
40//! - **Domain**: Core business logic (graph execution, pipeline orchestration)
41//! - **Ports**: Trait definitions (service interfaces, abstractions)
42//! - **Adapters**: Implementations (HTTP, AI providers, storage, caching)
43//! - **Application**: Orchestration (service registry, executor, CLI)
44//!
45//! ## Features
46//!
47//! - πŸ•ΈοΈ **Graph-based execution**: DAG pipelines with petgraph
48//! - πŸ€– **Multi-AI support**: Claude, GPT, Gemini, Copilot, Ollama
49//! - 🌐 **JavaScript rendering**: Optional browser automation via `stygian-browser`
50//! - πŸ“Š **Multi-modal extraction**: HTML, PDF, images, video, audio
51//! - πŸ›‘οΈ **Anti-bot handling**: User-Agent rotation, proxy support, rate limiting
52//! - πŸš€ **High concurrency**: Worker pools, backpressure, Tokio + Rayon
53//! - πŸ”„ **Idempotent operations**: Safe retries with idempotency keys
54//! - πŸ“ˆ **Observability**: Metrics, tracing, monitoring
55//!
56//! ## Crate Features
57//!
58//! - `browser` (default): Include stygian-browser for JavaScript rendering
59//! - `full`: All features enabled
60//!
61//! ## Request Signing
62//!
63//! Use [`ports::signing::SigningPort`] + [`adapters::signing::HttpSigningAdapter`] to attach
64//! HMAC signatures, AWS Sig V4, OAuth 1.0a, or Frida RPC tokens to any outbound request.
65//! No feature flag required β€” zero additional dependencies.
66
67#![warn(missing_docs, rustdoc::broken_intra_doc_links)]
68#![deny(unsafe_code)]
69#![cfg_attr(docsrs, feature(doc_cfg))]
70
71// ═══════════════════════════════════════════════════════════════════════════
72// Internal Module Organization (Hexagonal Architecture)
73// ═══════════════════════════════════════════════════════════════════════════
74
75/// Core domain logic - graph execution, pipelines, orchestration
76///
77/// **Hexagonal principle**: Domain never imports adapters, only ports (traits).
78pub mod domain;
79
80/// Port trait definitions - service abstractions
81///
82/// Defines interfaces that adapters must implement:
83/// - `ScrapingService`: HTTP fetchers, browser automation
84/// - `AIProvider`: LLM extraction services
85/// - `CachePort`: Caching abstractions
86/// - `CircuitBreaker`: Resilience patterns
87pub mod ports;
88
89/// Adapter implementations - infrastructure concerns
90///
91/// Concrete implementations of port traits:
92/// - HTTP client with anti-bot features
93/// - AI providers (Claude, GPT, Gemini, Ollama)
94/// - Storage backends (file, S3, database)
95/// - Cache backends (memory, Redis, file)
96pub mod adapters;
97
98/// Application layer - orchestration and coordination
99///
100/// High-level coordination logic:
101/// - Service registry with dependency injection
102/// - Pipeline executor
103/// - CLI interface
104/// - Configuration management
105pub mod application;
106
107// ═══════════════════════════════════════════════════════════════════════════
108// Public API
109// ═══════════════════════════════════════════════════════════════════════════
110
111/// Error types used throughout the crate
112pub mod error {
113    pub use crate::domain::error::*;
114}
115
116/// Re-exports for convenient imports
117///
118/// # Example
119///
120/// ```
121/// use stygian_graph::prelude::*;
122/// ```
123pub mod prelude {
124    pub use crate::domain::pipeline::*;
125    pub use crate::error::*;
126    pub use crate::ports::*;
127}
128
129// Re-export browser crate if feature is enabled
130#[cfg(feature = "browser")]
131#[cfg_attr(docsrs, doc(cfg(feature = "browser")))]
132pub use stygian_browser;
133
134#[cfg(test)]
135mod tests {
136    #[test]
137    fn it_works() {
138        assert_eq!(2 + 2, 4);
139    }
140}