stygian_graph/ports.rs
1//! Port trait definitions - service abstractions
2//!
3//! Defines interfaces that adapters must implement.
4//! Following hexagonal architecture, these are the "ports" that connect
5//! domain logic to external infrastructure.
6
7use crate::domain::error::Result;
8use async_trait::async_trait;
9use serde_json::Value;
10
11/// Input to a scraping service
12///
13/// Contains the target URL and service-specific parameters that configure
14/// how the scraping operation should be performed.
15///
16/// # Example
17///
18/// ```
19/// use stygian_graph::ports::ServiceInput;
20/// use serde_json::json;
21///
22/// let input = ServiceInput {
23/// url: "https://example.com".to_string(),
24/// params: json!({
25/// "timeout_ms": 5000,
26/// "user_agent": "stygian/1.0"
27/// }),
28/// };
29/// ```
30#[derive(Debug, Clone)]
31pub struct ServiceInput {
32 /// Target URL to scrape
33 pub url: String,
34 /// Service-specific parameters (timeout, headers, etc.)
35 pub params: Value,
36}
37
38/// Output from a scraping service
39///
40/// Contains the raw scraped data and metadata about the operation
41/// for downstream processing and debugging.
42///
43/// # Example
44///
45/// ```
46/// use stygian_graph::ports::ServiceOutput;
47/// use serde_json::json;
48///
49/// let output = ServiceOutput {
50/// data: "<html>...</html>".to_string(),
51/// metadata: json!({
52/// "status_code": 200,
53/// "content_type": "text/html",
54/// "response_time_ms": 145
55/// }),
56/// };
57/// ```
58#[derive(Debug, Clone)]
59pub struct ServiceOutput {
60 /// Raw scraped data (HTML, JSON, binary, etc.)
61 pub data: String,
62 /// Metadata about the operation (status, timing, headers)
63 pub metadata: Value,
64}
65
66/// Primary port: `ScrapingService` trait
67///
68/// All scraping modules (HTTP, browser, JavaScript rendering) implement this trait.
69/// Uses `async_trait` for dyn compatibility with service registry.
70///
71/// # Example Implementation
72///
73/// ```no_run
74/// use stygian_graph::ports::{ScrapingService, ServiceInput, ServiceOutput};
75/// use stygian_graph::error::Result;
76/// use async_trait::async_trait;
77/// use serde_json::json;
78///
79/// struct MyService;
80///
81/// #[async_trait]
82/// impl ScrapingService for MyService {
83/// async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
84/// Ok(ServiceOutput {
85/// data: format!("Scraped: {}", input.url),
86/// metadata: json!({"status": "ok"}),
87/// })
88/// }
89///
90/// fn name(&self) -> &'static str {
91/// "my-service"
92/// }
93/// }
94/// ```
95#[async_trait]
96pub trait ScrapingService: Send + Sync {
97 /// Execute the scraping operation
98 ///
99 /// # Arguments
100 ///
101 /// * `input` - Service input containing URL and parameters
102 ///
103 /// # Returns
104 ///
105 /// * `Ok(ServiceOutput)` - Successful scraping result
106 /// * `Err(StygianError)` - Service error, timeout, or network failure
107 ///
108 /// # Example
109 ///
110 /// ```no_run
111 /// # use stygian_graph::ports::{ScrapingService, ServiceInput};
112 /// # use serde_json::json;
113 /// # async fn example(service: impl ScrapingService) {
114 /// let input = ServiceInput {
115 /// url: "https://example.com".to_string(),
116 /// params: json!({}),
117 /// };
118 /// let output = service.execute(input).await.unwrap();
119 /// println!("Data: {}", output.data);
120 /// # }
121 /// ```
122 async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput>;
123
124 /// Service name for identification in logs and metrics
125 ///
126 /// # Example
127 ///
128 /// ```no_run
129 /// # use stygian_graph::ports::ScrapingService;
130 /// # fn example(service: impl ScrapingService) {
131 /// println!("Using service: {}", service.name());
132 /// # }
133 /// ```
134 fn name(&self) -> &'static str;
135}
136
137/// Provider capability flags
138///
139/// Describes the capabilities supported by an AI provider.
140///
141/// # Example
142///
143/// ```
144/// use stygian_graph::ports::ProviderCapabilities;
145///
146/// let caps = ProviderCapabilities {
147/// streaming: true,
148/// vision: false,
149/// tool_use: true,
150/// json_mode: true,
151/// };
152/// assert!(caps.streaming, "Provider supports streaming");
153/// ```
154#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
155pub struct ProviderCapabilities {
156 /// Supports streaming responses
157 pub streaming: bool,
158 /// Supports image/video analysis
159 pub vision: bool,
160 /// Supports function calling/tool use
161 pub tool_use: bool,
162 /// Native JSON output mode
163 pub json_mode: bool,
164}
165
166/// AI Provider port for LLM-based extraction
167///
168/// All LLM providers (Claude, GPT, Gemini, GitHub Copilot, Ollama) implement this trait.
169/// Uses `async_trait` for dyn compatibility with service registry.
170///
171/// # Example Implementation
172///
173/// ```no_run
174/// use stygian_graph::ports::{AIProvider, ProviderCapabilities};
175/// use stygian_graph::domain::error::Result;
176/// use async_trait::async_trait;
177/// use serde_json::{json, Value};
178/// use futures::stream::{Stream, BoxStream};
179///
180/// struct MyProvider;
181///
182/// #[async_trait]
183/// impl AIProvider for MyProvider {
184/// async fn extract(&self, content: String, schema: Value) -> Result<Value> {
185/// Ok(json!({"extracted": "data"}))
186/// }
187///
188/// async fn stream_extract(
189/// &self,
190/// content: String,
191/// schema: Value,
192/// ) -> Result<BoxStream<'static, Result<Value>>> {
193/// unimplemented!("Streaming not supported")
194/// }
195///
196/// fn capabilities(&self) -> ProviderCapabilities {
197/// ProviderCapabilities {
198/// streaming: false,
199/// vision: false,
200/// tool_use: true,
201/// json_mode: true,
202/// }
203/// }
204///
205/// fn name(&self) -> &'static str {
206/// "my-provider"
207/// }
208/// }
209/// ```
210#[async_trait]
211pub trait AIProvider: Send + Sync {
212 /// Extract structured data from content using LLM
213 ///
214 /// # Arguments
215 ///
216 /// * `content` - Raw content to analyze (text, HTML, etc.)
217 /// * `schema` - JSON schema defining expected output structure
218 ///
219 /// # Returns
220 ///
221 /// * `Ok(Value)` - Extracted data matching the schema
222 /// * `Err(ProviderError)` - API error, token limit, or policy violation
223 ///
224 /// # Example
225 ///
226 /// ```no_run
227 /// # use stygian_graph::ports::AIProvider;
228 /// # use serde_json::json;
229 /// # async fn example(provider: impl AIProvider) {
230 /// let schema = json!({
231 /// "type": "object",
232 /// "properties": {
233 /// "title": {"type": "string"},
234 /// "price": {"type": "number"}
235 /// }
236 /// });
237 /// let result = provider.extract("<html>...</html>".to_string(), schema).await.unwrap();
238 /// println!("Extracted: {}", result);
239 /// # }
240 /// ```
241 async fn extract(&self, content: String, schema: Value) -> Result<Value>;
242
243 /// Stream structured data extraction for real-time processing
244 ///
245 /// Returns a stream of partial results as they arrive from the LLM.
246 /// Only supported by providers with `capabilities().streaming == true`.
247 ///
248 /// # Arguments
249 ///
250 /// * `content` - Raw content to analyze
251 /// * `schema` - JSON schema defining expected output structure
252 ///
253 /// # Returns
254 ///
255 /// * `Ok(BoxStream)` - Stream of partial extraction results
256 /// * `Err(ProviderError)` - If streaming is not supported or API error
257 ///
258 /// # Example
259 ///
260 /// ```no_run
261 /// # use stygian_graph::ports::AIProvider;
262 /// # use serde_json::json;
263 /// # use futures::StreamExt;
264 /// # async fn example(provider: impl AIProvider) {
265 /// let schema = json!({"type": "object"});
266 /// let mut stream = provider.stream_extract("content".to_string(), schema).await.unwrap();
267 /// while let Some(result) = stream.next().await {
268 /// match result {
269 /// Ok(partial) => println!("Chunk: {}", partial),
270 /// Err(e) => eprintln!("Stream error: {}", e),
271 /// }
272 /// }
273 /// # }
274 /// ```
275 async fn stream_extract(
276 &self,
277 content: String,
278 schema: Value,
279 ) -> Result<futures::stream::BoxStream<'static, Result<Value>>>;
280
281 /// Get provider capabilities
282 ///
283 /// Returns a struct describing what features this provider supports.
284 ///
285 /// # Example
286 ///
287 /// ```no_run
288 /// # use stygian_graph::ports::AIProvider;
289 /// # fn example(provider: impl AIProvider) {
290 /// let caps = provider.capabilities();
291 /// if caps.streaming {
292 /// println!("Provider supports streaming");
293 /// }
294 /// if caps.vision {
295 /// println!("Provider supports image analysis");
296 /// }
297 /// # }
298 /// ```
299 fn capabilities(&self) -> ProviderCapabilities;
300
301 /// Provider name (claude, gpt, gemini, etc.)
302 ///
303 /// # Example
304 ///
305 /// ```no_run
306 /// # use stygian_graph::ports::AIProvider;
307 /// # fn example(provider: impl AIProvider) {
308 /// println!("Using provider: {}", provider.name());
309 /// # }
310 /// ```
311 fn name(&self) -> &'static str;
312}
313
314/// Cache port for storing/retrieving data with idempotency key support
315///
316/// Provides a key-value store interface for caching scraped content,
317/// API responses, and idempotency tracking.
318///
319/// # Example Implementation
320///
321/// ```no_run
322/// use stygian_graph::ports::CachePort;
323/// use stygian_graph::domain::error::Result;
324/// use async_trait::async_trait;
325/// use std::time::Duration;
326///
327/// struct MyCache;
328///
329/// #[async_trait]
330/// impl CachePort for MyCache {
331/// async fn get(&self, key: &str) -> Result<Option<String>> {
332/// // Fetch from cache backend
333/// Ok(Some("cached_value".to_string()))
334/// }
335///
336/// async fn set(&self, key: &str, value: String, ttl: Option<Duration>) -> Result<()> {
337/// // Store in cache backend
338/// Ok(())
339/// }
340///
341/// async fn invalidate(&self, key: &str) -> Result<()> {
342/// // Remove from cache
343/// Ok(())
344/// }
345///
346/// async fn exists(&self, key: &str) -> Result<bool> {
347/// // Check existence
348/// Ok(true)
349/// }
350/// }
351/// ```
352#[async_trait]
353pub trait CachePort: Send + Sync {
354 /// Get value from cache
355 ///
356 /// # Arguments
357 ///
358 /// * `key` - Cache key (URL, idempotency key, etc.)
359 ///
360 /// # Returns
361 ///
362 /// * `Ok(Some(String))` - Cache hit
363 /// * `Ok(None)` - Cache miss
364 /// * `Err(CacheError)` - Backend failure
365 ///
366 /// # Example
367 ///
368 /// ```no_run
369 /// # use stygian_graph::ports::CachePort;
370 /// # async fn example(cache: impl CachePort) {
371 /// match cache.get("page:123").await {
372 /// Ok(Some(content)) => println!("Cache hit: {}", content),
373 /// Ok(None) => println!("Cache miss"),
374 /// Err(e) => eprintln!("Cache error: {}", e),
375 /// }
376 /// # }
377 /// ```
378 async fn get(&self, key: &str) -> Result<Option<String>>;
379
380 /// Set value in cache with optional TTL
381 ///
382 /// # Arguments
383 ///
384 /// * `key` - Cache key
385 /// * `value` - Value to store (JSON, HTML, etc.)
386 /// * `ttl` - Optional expiration duration
387 ///
388 /// # Returns
389 ///
390 /// * `Ok(())` - Successfully stored
391 /// * `Err(CacheError)` - Backend failure
392 ///
393 /// # Example
394 ///
395 /// ```no_run
396 /// # use stygian_graph::ports::CachePort;
397 /// # use std::time::Duration;
398 /// # async fn example(cache: impl CachePort) {
399 /// cache.set(
400 /// "page:123",
401 /// "<html>...</html>".to_string(),
402 /// Some(Duration::from_secs(3600))
403 /// ).await.unwrap();
404 /// # }
405 /// ```
406 async fn set(&self, key: &str, value: String, ttl: Option<std::time::Duration>) -> Result<()>;
407
408 /// Invalidate cache entry
409 ///
410 /// # Arguments
411 ///
412 /// * `key` - Cache key to invalidate
413 ///
414 /// # Returns
415 ///
416 /// * `Ok(())` - Successfully invalidated (or key didn't exist)
417 /// * `Err(CacheError)` - Backend failure
418 ///
419 /// # Example
420 ///
421 /// ```no_run
422 /// # use stygian_graph::ports::CachePort;
423 /// # async fn example(cache: impl CachePort) {
424 /// cache.invalidate("page:123").await.unwrap();
425 /// # }
426 /// ```
427 async fn invalidate(&self, key: &str) -> Result<()>;
428
429 /// Check if key exists in cache
430 ///
431 /// # Arguments
432 ///
433 /// * `key` - Cache key to check
434 ///
435 /// # Returns
436 ///
437 /// * `Ok(true)` - Key exists
438 /// * `Ok(false)` - Key does not exist or expired
439 /// * `Err(CacheError)` - Backend failure
440 ///
441 /// # Example
442 ///
443 /// ```no_run
444 /// # use stygian_graph::ports::CachePort;
445 /// # async fn example(cache: impl CachePort) {
446 /// if cache.exists("page:123").await.unwrap() {
447 /// println!("Page is cached");
448 /// }
449 /// # }
450 /// ```
451 async fn exists(&self, key: &str) -> Result<bool>;
452}
453
454/// Circuit breaker state
455///
456/// Represents the current state of a circuit breaker following
457/// the standard circuit breaker pattern.
458///
459/// # State Transitions
460///
461/// ```text
462/// Closed ---(too many failures)---> Open
463/// Open -----(timeout elapsed)-----> HalfOpen
464/// HalfOpen --(success)-----------> Closed
465/// HalfOpen --(failure)-----------> Open
466/// ```
467///
468/// # Example
469///
470/// ```
471/// use stygian_graph::ports::CircuitState;
472///
473/// let state = CircuitState::Closed;
474/// assert!(matches!(state, CircuitState::Closed));
475/// ```
476#[derive(Debug, Clone, Copy, PartialEq, Eq)]
477pub enum CircuitState {
478 /// Normal operation, requests pass through
479 Closed,
480 /// Circuit is open, requests fail fast
481 Open,
482 /// Testing if service recovered, limited requests allowed
483 HalfOpen,
484}
485
486/// Circuit breaker port for resilience
487///
488/// Implements the circuit breaker pattern to prevent cascading failures
489/// when external services are unavailable. Uses interior mutability
490/// for state management.
491///
492/// # Example Implementation
493///
494/// ```no_run
495/// use stygian_graph::ports::{CircuitBreaker, CircuitState};
496/// use stygian_graph::domain::error::Result;
497/// use parking_lot::RwLock;
498/// use std::sync::Arc;
499///
500/// struct MyCircuitBreaker {
501/// state: Arc<RwLock<CircuitState>>,
502/// }
503///
504/// impl CircuitBreaker for MyCircuitBreaker {
505/// fn state(&self) -> CircuitState {
506/// *self.state.read()
507/// }
508///
509/// fn record_success(&self) {
510/// let mut state = self.state.write();
511/// *state = CircuitState::Closed;
512/// }
513///
514/// fn record_failure(&self) {
515/// let mut state = self.state.write();
516/// *state = CircuitState::Open;
517/// }
518///
519/// fn attempt_reset(&self) -> bool {
520/// let mut state = self.state.write();
521/// if matches!(*state, CircuitState::Open) {
522/// *state = CircuitState::HalfOpen;
523/// true
524/// } else {
525/// false
526/// }
527/// }
528/// }
529/// ```
530pub trait CircuitBreaker: Send + Sync {
531 /// Get current circuit breaker state
532 ///
533 /// # Returns
534 ///
535 /// Current state (`Closed`, `Open`, or `HalfOpen`)
536 ///
537 /// # Example
538 ///
539 /// ```no_run
540 /// # use stygian_graph::ports::{CircuitBreaker, CircuitState};
541 /// # fn example(cb: impl CircuitBreaker) {
542 /// match cb.state() {
543 /// CircuitState::Closed => println!("Normal operation"),
544 /// CircuitState::Open => println!("Circuit is open, failing fast"),
545 /// CircuitState::HalfOpen => println!("Testing recovery"),
546 /// }
547 /// # }
548 /// ```
549 fn state(&self) -> CircuitState;
550
551 /// Record successful operation
552 ///
553 /// Transitions `HalfOpen` -> `Closed`, maintains `Closed` state.
554 ///
555 /// # Example
556 ///
557 /// ```no_run
558 /// # use stygian_graph::ports::CircuitBreaker;
559 /// # fn example(cb: impl CircuitBreaker) {
560 /// // After successful API call
561 /// cb.record_success();
562 /// # }
563 /// ```
564 fn record_success(&self);
565
566 /// Record failed operation
567 ///
568 /// May transition `Closed` -> `Open` or `HalfOpen` -> `Open` depending on
569 /// failure threshold configuration.
570 ///
571 /// # Example
572 ///
573 /// ```no_run
574 /// # use stygian_graph::ports::CircuitBreaker;
575 /// # fn example(cb: impl CircuitBreaker) {
576 /// // After failed API call
577 /// cb.record_failure();
578 /// # }
579 /// ```
580 fn record_failure(&self);
581
582 /// Attempt to reset circuit from `Open` to `HalfOpen`
583 ///
584 /// Called after timeout period to test if service recovered.
585 ///
586 /// # Returns
587 ///
588 /// * `true` - Successfully transitioned to `HalfOpen`
589 /// * `false` - Already in `Closed` or `HalfOpen` state
590 ///
591 /// # Example
592 ///
593 /// ```no_run
594 /// # use stygian_graph::ports::CircuitBreaker;
595 /// # fn example(cb: impl CircuitBreaker) {
596 /// if cb.attempt_reset() {
597 /// println!("Circuit breaker now in HalfOpen state");
598 /// }
599 /// # }
600 /// ```
601 fn attempt_reset(&self) -> bool;
602}
603
604/// Rate limit configuration
605///
606/// Defines the rate limiting parameters using a token bucket approach.
607///
608/// # Example
609///
610/// ```
611/// use stygian_graph::ports::RateLimitConfig;
612/// use std::time::Duration;
613///
614/// // Allow 100 requests per minute
615/// let config = RateLimitConfig {
616/// max_requests: 100,
617/// window: Duration::from_secs(60),
618/// };
619/// ```
620#[derive(Debug, Clone, Copy)]
621pub struct RateLimitConfig {
622 /// Maximum number of requests allowed in the time window
623 pub max_requests: u32,
624 /// Time window for rate limiting
625 pub window: std::time::Duration,
626}
627
628/// Rate limiter port
629///
630/// Implements rate limiting to prevent overwhelming external services
631/// or exceeding API quotas. Supports per-key rate limiting for
632/// multi-tenant scenarios.
633///
634/// # Example Implementation
635///
636/// ```no_run
637/// use stygian_graph::ports::RateLimiter;
638/// use stygian_graph::domain::error::Result;
639/// use async_trait::async_trait;
640///
641/// struct MyRateLimiter;
642///
643/// #[async_trait]
644/// impl RateLimiter for MyRateLimiter {
645/// async fn check_rate_limit(&self, key: &str) -> Result<bool> {
646/// // Check if key is within rate limit
647/// Ok(true)
648/// }
649///
650/// async fn record_request(&self, key: &str) -> Result<()> {
651/// // Record request for rate limiting
652/// Ok(())
653/// }
654/// }
655/// ```
656#[async_trait]
657pub trait RateLimiter: Send + Sync {
658 /// Check if key is within rate limit
659 ///
660 /// # Arguments
661 ///
662 /// * `key` - Rate limit key (service name, API endpoint, user ID, etc.)
663 ///
664 /// # Returns
665 ///
666 /// * `Ok(true)` - Request allowed
667 /// * `Ok(false)` - Rate limit exceeded
668 /// * `Err(RateLimitError)` - Backend failure
669 ///
670 /// # Example
671 ///
672 /// ```no_run
673 /// # use stygian_graph::ports::RateLimiter;
674 /// # async fn example(limiter: impl RateLimiter) {
675 /// if limiter.check_rate_limit("api:openai").await.unwrap() {
676 /// println!("Request allowed");
677 /// } else {
678 /// println!("Rate limit exceeded, retry later");
679 /// }
680 /// # }
681 /// ```
682 async fn check_rate_limit(&self, key: &str) -> Result<bool>;
683
684 /// Record a request for rate limiting
685 ///
686 /// Should be called after successful operation to update the rate limit counter.
687 ///
688 /// # Arguments
689 ///
690 /// * `key` - Rate limit key
691 ///
692 /// # Returns
693 ///
694 /// * `Ok(())` - Request recorded
695 /// * `Err(RateLimitError)` - Backend failure
696 ///
697 /// # Example
698 ///
699 /// ```no_run
700 /// # use stygian_graph::ports::RateLimiter;
701 /// # async fn example(limiter: impl RateLimiter) {
702 /// // After making API call
703 /// limiter.record_request("api:openai").await.unwrap();
704 /// # }
705 /// ```
706 async fn record_request(&self, key: &str) -> Result<()>;
707}
708
709// ─────────────────────────────────────────────────────────────────────────────
710// GraphQL auth types
711// ─────────────────────────────────────────────────────────────────────────────
712
713/// Authentication strategy for a GraphQL request.
714///
715/// # Example
716///
717/// ```rust
718/// use stygian_graph::ports::{GraphQlAuth, GraphQlAuthKind};
719///
720/// let auth = GraphQlAuth {
721/// kind: GraphQlAuthKind::Bearer,
722/// token: "${env:MY_TOKEN}".to_string(),
723/// header_name: None,
724/// };
725/// ```
726#[derive(Debug, Clone)]
727pub struct GraphQlAuth {
728 /// The authentication strategy to apply
729 pub kind: GraphQlAuthKind,
730 /// The token value (supports `${env:VAR}` expansion)
731 pub token: String,
732 /// Custom header name (required when `kind == Header`)
733 pub header_name: Option<String>,
734}
735
736/// Discriminant for `GraphQlAuth`
737#[derive(Debug, Clone, PartialEq, Eq)]
738pub enum GraphQlAuthKind {
739 /// `Authorization: Bearer <token>`
740 Bearer,
741 /// `X-Api-Key: <token>`
742 ApiKey,
743 /// Arbitrary header specified by `header_name`
744 Header,
745 /// No authentication
746 None,
747}
748
749/// GraphQL plugin sub-module
750pub mod graphql_plugin;
751
752/// Work queue port — distributed task execution
753pub mod work_queue;
754
755/// WASM plugin port — dynamic plugin loading
756pub mod wasm_plugin;
757
758/// Storage port — persist and retrieve pipeline results
759pub mod storage;
760
761/// Auth port — runtime token loading, expiry checking, and refresh.
762pub mod auth;
763
764/// Signing port — attach HMAC, OAuth 1.0a, AWS Sig V4, or Frida RPC signatures to requests.
765pub mod signing;