stygian_graph/lib.rs
1//! # Stygian Graph
2#![allow(clippy::multiple_crate_versions)]
3//!
4//! A high-performance, graph-based web scraping engine for Rust.
5//!
6//! ## Overview
7//!
8//! Stygian treats scraping pipelines as Directed Acyclic Graphs (DAGs) where each node
9//! is a pluggable service module (HTTP fetchers, AI extractors, headless browsers).
10//! Built for extreme concurrency and extensibility using hexagonal architecture.
11//!
12//! ## Quick Start
13//!
14//! ```no_run
15//! use stygian_graph::domain::graph::Pipeline;
16//! use stygian_graph::domain::pipeline::PipelineUnvalidated;
17//!
18//! #[tokio::main]
19//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
20//! // Create a simple scraping pipeline
21//! let config = serde_json::json!({
22//! "nodes": [],
23//! "edges": []
24//! });
25//!
26//! let pipeline = PipelineUnvalidated::new(config)
27//! .validate()?
28//! .execute()
29//! .complete(serde_json::json!({"status": "success"}));
30//!
31//! println!("Pipeline complete: {:?}", pipeline.results());
32//! Ok(())
33//! }
34//! ```
35//!
36//! ## Architecture
37//!
38//! Stygian follows hexagonal (ports & adapters) architecture:
39//!
40//! - **Domain**: Core business logic (graph execution, pipeline orchestration)
41//! - **Ports**: Trait definitions (service interfaces, abstractions)
42//! - **Adapters**: Implementations (HTTP, AI providers, storage, caching)
43//! - **Application**: Orchestration (service registry, executor, CLI)
44//!
45//! ## Features
46//!
47//! - πΈοΈ **Graph-based execution**: DAG pipelines with petgraph
48//! - π€ **Multi-AI support**: Claude, GPT, Gemini, Copilot, Ollama
49//! - π **JavaScript rendering**: Optional browser automation via `stygian-browser`
50//! - π **Multi-modal extraction**: HTML, PDF, images, video, audio
51//! - π‘οΈ **Anti-bot handling**: User-Agent rotation, proxy support, rate limiting
52//! - π **High concurrency**: Worker pools, backpressure, Tokio + Rayon
53//! - π **Idempotent operations**: Safe retries with idempotency keys
54//! - π **Observability**: Metrics, tracing, monitoring
55//!
56//! ## Crate Features
57//!
58//! - `browser` (default): Include stygian-browser for JavaScript rendering
59//! - `full`: All features enabled
60//!
61//! ## Request Signing
62//!
63//! Use [`ports::signing::SigningPort`] + [`adapters::signing::HttpSigningAdapter`] to attach
64//! HMAC signatures, AWS Sig V4, OAuth 1.0a, or Frida RPC tokens to any outbound request.
65//! No feature flag required β zero additional dependencies.
66
67#![warn(missing_docs, rustdoc::broken_intra_doc_links)]
68#![deny(unsafe_code)]
69#![cfg_attr(docsrs, feature(doc_cfg))]
70
71// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
72// Internal Module Organization (Hexagonal Architecture)
73// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
74
75/// Core domain logic - graph execution, pipelines, orchestration
76///
77/// **Hexagonal principle**: Domain never imports adapters, only ports (traits).
78pub mod domain;
79
80/// Port trait definitions - service abstractions
81///
82/// Defines interfaces that adapters must implement:
83/// - `ScrapingService`: HTTP fetchers, browser automation
84/// - `AIProvider`: LLM extraction services
85/// - `CachePort`: Caching abstractions
86/// - `CircuitBreaker`: Resilience patterns
87pub mod ports;
88
89/// Adapter implementations - infrastructure concerns
90///
91/// Concrete implementations of port traits:
92/// - HTTP client with anti-bot features
93/// - AI providers (Claude, GPT, Gemini, Ollama)
94/// - Storage backends (file, S3, database)
95/// - Cache backends (memory, Redis, file)
96pub mod adapters;
97
98/// Application layer - orchestration and coordination
99///
100/// High-level coordination logic:
101/// - Service registry with dependency injection
102/// - Pipeline executor
103/// - CLI interface
104/// - Configuration management
105pub mod application;
106
107// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
108// Public API
109// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
110
111/// Error types used throughout the crate
112pub mod error {
113 pub use crate::domain::error::*;
114}
115
116/// Re-exports for convenient imports
117///
118/// # Example
119///
120/// ```
121/// use stygian_graph::prelude::*;
122/// ```
123pub mod prelude {
124 pub use crate::domain::pipeline::*;
125 pub use crate::error::*;
126 pub use crate::ports::*;
127}
128
129// Re-export browser crate if feature is enabled
130#[cfg(feature = "browser")]
131#[cfg_attr(docsrs, doc(cfg(feature = "browser")))]
132pub use stygian_browser;
133
134#[cfg(test)]
135mod tests {
136 #[test]
137 fn it_works() {
138 assert_eq!(2 + 2, 4);
139 }
140}