stygian_graph/
ports.rs

1//! Port trait definitions - service abstractions
2//!
3//! Defines interfaces that adapters must implement.
4//! Following hexagonal architecture, these are the "ports" that connect
5//! domain logic to external infrastructure.
6
7use crate::domain::error::Result;
8use async_trait::async_trait;
9use serde_json::Value;
10
11/// Input to a scraping service
12///
13/// Contains the target URL and service-specific parameters that configure
14/// how the scraping operation should be performed.
15///
16/// # Example
17///
18/// ```
19/// use stygian_graph::ports::ServiceInput;
20/// use serde_json::json;
21///
22/// let input = ServiceInput {
23///     url: "https://example.com".to_string(),
24///     params: json!({
25///         "timeout_ms": 5000,
26///         "user_agent": "stygian/1.0"
27///     }),
28/// };
29/// ```
30#[derive(Debug, Clone)]
31pub struct ServiceInput {
32    /// Target URL to scrape
33    pub url: String,
34    /// Service-specific parameters (timeout, headers, etc.)
35    pub params: Value,
36}
37
38/// Output from a scraping service
39///
40/// Contains the raw scraped data and metadata about the operation
41/// for downstream processing and debugging.
42///
43/// # Example
44///
45/// ```
46/// use stygian_graph::ports::ServiceOutput;
47/// use serde_json::json;
48///
49/// let output = ServiceOutput {
50///     data: "<html>...</html>".to_string(),
51///     metadata: json!({
52///         "status_code": 200,
53///         "content_type": "text/html",
54///         "response_time_ms": 145
55///     }),
56/// };
57/// ```
58#[derive(Debug, Clone)]
59pub struct ServiceOutput {
60    /// Raw scraped data (HTML, JSON, binary, etc.)
61    pub data: String,
62    /// Metadata about the operation (status, timing, headers)
63    pub metadata: Value,
64}
65
66/// Primary port: `ScrapingService` trait
67///
68/// All scraping modules (HTTP, browser, JavaScript rendering) implement this trait.
69/// Uses `async_trait` for dyn compatibility with service registry.
70///
71/// # Example Implementation
72///
73/// ```no_run
74/// use stygian_graph::ports::{ScrapingService, ServiceInput, ServiceOutput};
75/// use stygian_graph::error::Result;
76/// use async_trait::async_trait;
77/// use serde_json::json;
78///
79/// struct MyService;
80///
81/// #[async_trait]
82/// impl ScrapingService for MyService {
83///     async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
84///         Ok(ServiceOutput {
85///             data: format!("Scraped: {}", input.url),
86///             metadata: json!({"status": "ok"}),
87///         })
88///     }
89///     
90///     fn name(&self) -> &'static str {
91///         "my-service"
92///     }
93/// }
94/// ```
95#[async_trait]
96pub trait ScrapingService: Send + Sync {
97    /// Execute the scraping operation
98    ///
99    /// # Arguments
100    ///
101    /// * `input` - Service input containing URL and parameters
102    ///
103    /// # Returns
104    ///
105    /// * `Ok(ServiceOutput)` - Successful scraping result
106    /// * `Err(StygianError)` - Service error, timeout, or network failure
107    ///
108    /// # Example
109    ///
110    /// ```no_run
111    /// # use stygian_graph::ports::{ScrapingService, ServiceInput};
112    /// # use serde_json::json;
113    /// # async fn example(service: impl ScrapingService) {
114    /// let input = ServiceInput {
115    ///     url: "https://example.com".to_string(),
116    ///     params: json!({}),
117    /// };
118    /// let output = service.execute(input).await.unwrap();
119    /// println!("Data: {}", output.data);
120    /// # }
121    /// ```
122    async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput>;
123
124    /// Service name for identification in logs and metrics
125    ///
126    /// # Example
127    ///
128    /// ```no_run
129    /// # use stygian_graph::ports::ScrapingService;
130    /// # fn example(service: impl ScrapingService) {
131    /// println!("Using service: {}", service.name());
132    /// # }
133    /// ```
134    fn name(&self) -> &'static str;
135}
136
137/// Provider capability flags
138///
139/// Describes the capabilities supported by an AI provider.
140///
141/// # Example
142///
143/// ```
144/// use stygian_graph::ports::ProviderCapabilities;
145///
146/// let caps = ProviderCapabilities {
147///     streaming: true,
148///     vision: false,
149///     tool_use: true,
150///     json_mode: true,
151/// };
152/// assert!(caps.streaming, "Provider supports streaming");
153/// ```
154#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
155pub struct ProviderCapabilities {
156    /// Supports streaming responses
157    pub streaming: bool,
158    /// Supports image/video analysis
159    pub vision: bool,
160    /// Supports function calling/tool use
161    pub tool_use: bool,
162    /// Native JSON output mode
163    pub json_mode: bool,
164}
165
166/// AI Provider port for LLM-based extraction
167///
168/// All LLM providers (Claude, GPT, Gemini, GitHub Copilot, Ollama) implement this trait.
169/// Uses `async_trait` for dyn compatibility with service registry.
170///
171/// # Example Implementation
172///
173/// ```no_run
174/// use stygian_graph::ports::{AIProvider, ProviderCapabilities};
175/// use stygian_graph::domain::error::Result;
176/// use async_trait::async_trait;
177/// use serde_json::{json, Value};
178/// use futures::stream::{Stream, BoxStream};
179///
180/// struct MyProvider;
181///
182/// #[async_trait]
183/// impl AIProvider for MyProvider {
184///     async fn extract(&self, content: String, schema: Value) -> Result<Value> {
185///         Ok(json!({"extracted": "data"}))
186///     }
187///
188///     async fn stream_extract(
189///         &self,
190///         content: String,
191///         schema: Value,
192///     ) -> Result<BoxStream<'static, Result<Value>>> {
193///         unimplemented!("Streaming not supported")
194///     }
195///
196///     fn capabilities(&self) -> ProviderCapabilities {
197///         ProviderCapabilities {
198///             streaming: false,
199///             vision: false,
200///             tool_use: true,
201///             json_mode: true,
202///         }
203///     }
204///
205///     fn name(&self) -> &'static str {
206///         "my-provider"
207///     }
208/// }
209/// ```
210#[async_trait]
211pub trait AIProvider: Send + Sync {
212    /// Extract structured data from content using LLM
213    ///
214    /// # Arguments
215    ///
216    /// * `content` - Raw content to analyze (text, HTML, etc.)
217    /// * `schema` - JSON schema defining expected output structure
218    ///
219    /// # Returns
220    ///
221    /// * `Ok(Value)` - Extracted data matching the schema
222    /// * `Err(ProviderError)` - API error, token limit, or policy violation
223    ///
224    /// # Example
225    ///
226    /// ```no_run
227    /// # use stygian_graph::ports::AIProvider;
228    /// # use serde_json::json;
229    /// # async fn example(provider: impl AIProvider) {
230    /// let schema = json!({
231    ///     "type": "object",
232    ///     "properties": {
233    ///         "title": {"type": "string"},
234    ///         "price": {"type": "number"}
235    ///     }
236    /// });
237    /// let result = provider.extract("<html>...</html>".to_string(), schema).await.unwrap();
238    /// println!("Extracted: {}", result);
239    /// # }
240    /// ```
241    async fn extract(&self, content: String, schema: Value) -> Result<Value>;
242
243    /// Stream structured data extraction for real-time processing
244    ///
245    /// Returns a stream of partial results as they arrive from the LLM.
246    /// Only supported by providers with `capabilities().streaming == true`.
247    ///
248    /// # Arguments
249    ///
250    /// * `content` - Raw content to analyze
251    /// * `schema` - JSON schema defining expected output structure
252    ///
253    /// # Returns
254    ///
255    /// * `Ok(BoxStream)` - Stream of partial extraction results
256    /// * `Err(ProviderError)` - If streaming is not supported or API error
257    ///
258    /// # Example
259    ///
260    /// ```no_run
261    /// # use stygian_graph::ports::AIProvider;
262    /// # use serde_json::json;
263    /// # use futures::StreamExt;
264    /// # async fn example(provider: impl AIProvider) {
265    /// let schema = json!({"type": "object"});
266    /// let mut stream = provider.stream_extract("content".to_string(), schema).await.unwrap();
267    /// while let Some(result) = stream.next().await {
268    ///     match result {
269    ///         Ok(partial) => println!("Chunk: {}", partial),
270    ///         Err(e) => eprintln!("Stream error: {}", e),
271    ///     }
272    /// }
273    /// # }
274    /// ```
275    async fn stream_extract(
276        &self,
277        content: String,
278        schema: Value,
279    ) -> Result<futures::stream::BoxStream<'static, Result<Value>>>;
280
281    /// Get provider capabilities
282    ///
283    /// Returns a struct describing what features this provider supports.
284    ///
285    /// # Example
286    ///
287    /// ```no_run
288    /// # use stygian_graph::ports::AIProvider;
289    /// # fn example(provider: impl AIProvider) {
290    /// let caps = provider.capabilities();
291    /// if caps.streaming {
292    ///     println!("Provider supports streaming");
293    /// }
294    /// if caps.vision {
295    ///     println!("Provider supports image analysis");
296    /// }
297    /// # }
298    /// ```
299    fn capabilities(&self) -> ProviderCapabilities;
300
301    /// Provider name (claude, gpt, gemini, etc.)
302    ///
303    /// # Example
304    ///
305    /// ```no_run
306    /// # use stygian_graph::ports::AIProvider;
307    /// # fn example(provider: impl AIProvider) {
308    /// println!("Using provider: {}", provider.name());
309    /// # }
310    /// ```
311    fn name(&self) -> &'static str;
312}
313
314/// Cache port for storing/retrieving data with idempotency key support
315///
316/// Provides a key-value store interface for caching scraped content,
317/// API responses, and idempotency tracking.
318///
319/// # Example Implementation
320///
321/// ```no_run
322/// use stygian_graph::ports::CachePort;
323/// use stygian_graph::domain::error::Result;
324/// use async_trait::async_trait;
325/// use std::time::Duration;
326///
327/// struct MyCache;
328///
329/// #[async_trait]
330/// impl CachePort for MyCache {
331///     async fn get(&self, key: &str) -> Result<Option<String>> {
332///         // Fetch from cache backend
333///         Ok(Some("cached_value".to_string()))
334///     }
335///
336///     async fn set(&self, key: &str, value: String, ttl: Option<Duration>) -> Result<()> {
337///         // Store in cache backend
338///         Ok(())
339///     }
340///
341///     async fn invalidate(&self, key: &str) -> Result<()> {
342///         // Remove from cache
343///         Ok(())
344///     }
345///
346///     async fn exists(&self, key: &str) -> Result<bool> {
347///         // Check existence
348///         Ok(true)
349///     }
350/// }
351/// ```
352#[async_trait]
353pub trait CachePort: Send + Sync {
354    /// Get value from cache
355    ///
356    /// # Arguments
357    ///
358    /// * `key` - Cache key (URL, idempotency key, etc.)
359    ///
360    /// # Returns
361    ///
362    /// * `Ok(Some(String))` - Cache hit
363    /// * `Ok(None)` - Cache miss
364    /// * `Err(CacheError)` - Backend failure
365    ///
366    /// # Example
367    ///
368    /// ```no_run
369    /// # use stygian_graph::ports::CachePort;
370    /// # async fn example(cache: impl CachePort) {
371    /// match cache.get("page:123").await {
372    ///     Ok(Some(content)) => println!("Cache hit: {}", content),
373    ///     Ok(None) => println!("Cache miss"),
374    ///     Err(e) => eprintln!("Cache error: {}", e),
375    /// }
376    /// # }
377    /// ```
378    async fn get(&self, key: &str) -> Result<Option<String>>;
379
380    /// Set value in cache with optional TTL
381    ///
382    /// # Arguments
383    ///
384    /// * `key` - Cache key
385    /// * `value` - Value to store (JSON, HTML, etc.)
386    /// * `ttl` - Optional expiration duration
387    ///
388    /// # Returns
389    ///
390    /// * `Ok(())` - Successfully stored
391    /// * `Err(CacheError)` - Backend failure
392    ///
393    /// # Example
394    ///
395    /// ```no_run
396    /// # use stygian_graph::ports::CachePort;
397    /// # use std::time::Duration;
398    /// # async fn example(cache: impl CachePort) {
399    /// cache.set(
400    ///     "page:123",
401    ///     "<html>...</html>".to_string(),
402    ///     Some(Duration::from_secs(3600))
403    /// ).await.unwrap();
404    /// # }
405    /// ```
406    async fn set(&self, key: &str, value: String, ttl: Option<std::time::Duration>) -> Result<()>;
407
408    /// Invalidate cache entry
409    ///
410    /// # Arguments
411    ///
412    /// * `key` - Cache key to invalidate
413    ///
414    /// # Returns
415    ///
416    /// * `Ok(())` - Successfully invalidated (or key didn't exist)
417    /// * `Err(CacheError)` - Backend failure
418    ///
419    /// # Example
420    ///
421    /// ```no_run
422    /// # use stygian_graph::ports::CachePort;
423    /// # async fn example(cache: impl CachePort) {
424    /// cache.invalidate("page:123").await.unwrap();
425    /// # }
426    /// ```
427    async fn invalidate(&self, key: &str) -> Result<()>;
428
429    /// Check if key exists in cache
430    ///
431    /// # Arguments
432    ///
433    /// * `key` - Cache key to check
434    ///
435    /// # Returns
436    ///
437    /// * `Ok(true)` - Key exists
438    /// * `Ok(false)` - Key does not exist or expired
439    /// * `Err(CacheError)` - Backend failure
440    ///
441    /// # Example
442    ///
443    /// ```no_run
444    /// # use stygian_graph::ports::CachePort;
445    /// # async fn example(cache: impl CachePort) {
446    /// if cache.exists("page:123").await.unwrap() {
447    ///     println!("Page is cached");
448    /// }
449    /// # }
450    /// ```
451    async fn exists(&self, key: &str) -> Result<bool>;
452}
453
454/// Circuit breaker state
455///
456/// Represents the current state of a circuit breaker following
457/// the standard circuit breaker pattern.
458///
459/// # State Transitions
460///
461/// ```text
462/// Closed ---(too many failures)---> Open
463/// Open -----(timeout elapsed)-----> HalfOpen
464/// HalfOpen --(success)-----------> Closed
465/// HalfOpen --(failure)-----------> Open
466/// ```
467///
468/// # Example
469///
470/// ```
471/// use stygian_graph::ports::CircuitState;
472///
473/// let state = CircuitState::Closed;
474/// assert!(matches!(state, CircuitState::Closed));
475/// ```
476#[derive(Debug, Clone, Copy, PartialEq, Eq)]
477pub enum CircuitState {
478    /// Normal operation, requests pass through
479    Closed,
480    /// Circuit is open, requests fail fast
481    Open,
482    /// Testing if service recovered, limited requests allowed
483    HalfOpen,
484}
485
486/// Circuit breaker port for resilience
487///
488/// Implements the circuit breaker pattern to prevent cascading failures
489/// when external services are unavailable. Uses interior mutability
490/// for state management.
491///
492/// # Example Implementation
493///
494/// ```no_run
495/// use stygian_graph::ports::{CircuitBreaker, CircuitState};
496/// use stygian_graph::domain::error::Result;
497/// use parking_lot::RwLock;
498/// use std::sync::Arc;
499///
500/// struct MyCircuitBreaker {
501///     state: Arc<RwLock<CircuitState>>,
502/// }
503///
504/// impl CircuitBreaker for MyCircuitBreaker {
505///     fn state(&self) -> CircuitState {
506///         *self.state.read()
507///     }
508///
509///     fn record_success(&self) {
510///         let mut state = self.state.write();
511///         *state = CircuitState::Closed;
512///     }
513///
514///     fn record_failure(&self) {
515///         let mut state = self.state.write();
516///         *state = CircuitState::Open;
517///     }
518///
519///     fn attempt_reset(&self) -> bool {
520///         let mut state = self.state.write();
521///         if matches!(*state, CircuitState::Open) {
522///             *state = CircuitState::HalfOpen;
523///             true
524///         } else {
525///             false
526///         }
527///     }
528/// }
529/// ```
530pub trait CircuitBreaker: Send + Sync {
531    /// Get current circuit breaker state
532    ///
533    /// # Returns
534    ///
535    /// Current state (`Closed`, `Open`, or `HalfOpen`)
536    ///
537    /// # Example
538    ///
539    /// ```no_run
540    /// # use stygian_graph::ports::{CircuitBreaker, CircuitState};
541    /// # fn example(cb: impl CircuitBreaker) {
542    /// match cb.state() {
543    ///     CircuitState::Closed => println!("Normal operation"),
544    ///     CircuitState::Open => println!("Circuit is open, failing fast"),
545    ///     CircuitState::HalfOpen => println!("Testing recovery"),
546    /// }
547    /// # }
548    /// ```
549    fn state(&self) -> CircuitState;
550
551    /// Record successful operation
552    ///
553    /// Transitions `HalfOpen` -> `Closed`, maintains `Closed` state.
554    ///
555    /// # Example
556    ///
557    /// ```no_run
558    /// # use stygian_graph::ports::CircuitBreaker;
559    /// # fn example(cb: impl CircuitBreaker) {
560    /// // After successful API call
561    /// cb.record_success();
562    /// # }
563    /// ```
564    fn record_success(&self);
565
566    /// Record failed operation
567    ///
568    /// May transition `Closed` -> `Open` or `HalfOpen` -> `Open` depending on
569    /// failure threshold configuration.
570    ///
571    /// # Example
572    ///
573    /// ```no_run
574    /// # use stygian_graph::ports::CircuitBreaker;
575    /// # fn example(cb: impl CircuitBreaker) {
576    /// // After failed API call
577    /// cb.record_failure();
578    /// # }
579    /// ```
580    fn record_failure(&self);
581
582    /// Attempt to reset circuit from `Open` to `HalfOpen`
583    ///
584    /// Called after timeout period to test if service recovered.
585    ///
586    /// # Returns
587    ///
588    /// * `true` - Successfully transitioned to `HalfOpen`
589    /// * `false` - Already in `Closed` or `HalfOpen` state
590    ///
591    /// # Example
592    ///
593    /// ```no_run
594    /// # use stygian_graph::ports::CircuitBreaker;
595    /// # fn example(cb: impl CircuitBreaker) {
596    /// if cb.attempt_reset() {
597    ///     println!("Circuit breaker now in HalfOpen state");
598    /// }
599    /// # }
600    /// ```
601    fn attempt_reset(&self) -> bool;
602}
603
604/// Rate limit configuration
605///
606/// Defines the rate limiting parameters using a token bucket approach.
607///
608/// # Example
609///
610/// ```
611/// use stygian_graph::ports::RateLimitConfig;
612/// use std::time::Duration;
613///
614/// // Allow 100 requests per minute
615/// let config = RateLimitConfig {
616///     max_requests: 100,
617///     window: Duration::from_secs(60),
618/// };
619/// ```
620#[derive(Debug, Clone, Copy)]
621pub struct RateLimitConfig {
622    /// Maximum number of requests allowed in the time window
623    pub max_requests: u32,
624    /// Time window for rate limiting
625    pub window: std::time::Duration,
626}
627
628/// Rate limiter port
629///
630/// Implements rate limiting to prevent overwhelming external services
631/// or exceeding API quotas. Supports per-key rate limiting for
632/// multi-tenant scenarios.
633///
634/// # Example Implementation
635///
636/// ```no_run
637/// use stygian_graph::ports::RateLimiter;
638/// use stygian_graph::domain::error::Result;
639/// use async_trait::async_trait;
640///
641/// struct MyRateLimiter;
642///
643/// #[async_trait]
644/// impl RateLimiter for MyRateLimiter {
645///     async fn check_rate_limit(&self, key: &str) -> Result<bool> {
646///         // Check if key is within rate limit
647///         Ok(true)
648///     }
649///
650///     async fn record_request(&self, key: &str) -> Result<()> {
651///         // Record request for rate limiting
652///         Ok(())
653///     }
654/// }
655/// ```
656#[async_trait]
657pub trait RateLimiter: Send + Sync {
658    /// Check if key is within rate limit
659    ///
660    /// # Arguments
661    ///
662    /// * `key` - Rate limit key (service name, API endpoint, user ID, etc.)
663    ///
664    /// # Returns
665    ///
666    /// * `Ok(true)` - Request allowed
667    /// * `Ok(false)` - Rate limit exceeded
668    /// * `Err(RateLimitError)` - Backend failure
669    ///
670    /// # Example
671    ///
672    /// ```no_run
673    /// # use stygian_graph::ports::RateLimiter;
674    /// # async fn example(limiter: impl RateLimiter) {
675    /// if limiter.check_rate_limit("api:openai").await.unwrap() {
676    ///     println!("Request allowed");
677    /// } else {
678    ///     println!("Rate limit exceeded, retry later");
679    /// }
680    /// # }
681    /// ```
682    async fn check_rate_limit(&self, key: &str) -> Result<bool>;
683
684    /// Record a request for rate limiting
685    ///
686    /// Should be called after successful operation to update the rate limit counter.
687    ///
688    /// # Arguments
689    ///
690    /// * `key` - Rate limit key
691    ///
692    /// # Returns
693    ///
694    /// * `Ok(())` - Request recorded
695    /// * `Err(RateLimitError)` - Backend failure
696    ///
697    /// # Example
698    ///
699    /// ```no_run
700    /// # use stygian_graph::ports::RateLimiter;
701    /// # async fn example(limiter: impl RateLimiter) {
702    /// // After making API call
703    /// limiter.record_request("api:openai").await.unwrap();
704    /// # }
705    /// ```
706    async fn record_request(&self, key: &str) -> Result<()>;
707}
708
709// ─────────────────────────────────────────────────────────────────────────────
710// GraphQL auth types
711// ─────────────────────────────────────────────────────────────────────────────
712
713/// Authentication strategy for a GraphQL request.
714///
715/// # Example
716///
717/// ```rust
718/// use stygian_graph::ports::{GraphQlAuth, GraphQlAuthKind};
719///
720/// let auth = GraphQlAuth {
721///     kind: GraphQlAuthKind::Bearer,
722///     token: "${env:MY_TOKEN}".to_string(),
723///     header_name: None,
724/// };
725/// ```
726#[derive(Debug, Clone)]
727pub struct GraphQlAuth {
728    /// The authentication strategy to apply
729    pub kind: GraphQlAuthKind,
730    /// The token value (supports `${env:VAR}` expansion)
731    pub token: String,
732    /// Custom header name (required when `kind == Header`)
733    pub header_name: Option<String>,
734}
735
736/// Discriminant for `GraphQlAuth`
737#[derive(Debug, Clone, PartialEq, Eq)]
738pub enum GraphQlAuthKind {
739    /// `Authorization: Bearer <token>`
740    Bearer,
741    /// `X-Api-Key: <token>`
742    ApiKey,
743    /// Arbitrary header specified by `header_name`
744    Header,
745    /// No authentication
746    None,
747}
748
749/// GraphQL plugin sub-module
750pub mod graphql_plugin;
751
752/// Work queue port — distributed task execution
753pub mod work_queue;
754
755/// WASM plugin port — dynamic plugin loading
756pub mod wasm_plugin;
757
758/// Storage port — persist and retrieve pipeline results
759pub mod storage;
760
761/// Auth port — runtime token loading, expiry checking, and refresh.
762pub mod auth;
763
764/// Signing port — attach HMAC, OAuth 1.0a, AWS Sig V4, or Frida RPC signatures to requests.
765pub mod signing;