Skip to main content

stygian_graph/application/
cli.rs

1//! Command-line interface for stygian
2//!
3//! Provides the `stygian` binary with subcommands for running, validating,
4//! and visualising scraping pipelines.
5//!
6//! # Example
7//!
8//! ```text
9//! stygian run pipeline.toml
10//! stygian check pipeline.toml
11//! stygian list-services
12//! stygian list-providers
13//! stygian graph-viz pipeline.toml --format mermaid
14//! ```
15
16use std::time::Duration;
17
18use clap::{Parser, Subcommand, ValueEnum};
19use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
20use tracing::{error, info};
21
22use crate::application::pipeline_parser::{PipelineParser, PipelineWatcher};
23use crate::application::registry::global_registry;
24
25// ─── Clap structs ─────────────────────────────────────────────────────────────
26
27/// Stygian — high-performance graph-based scraping engine
28#[derive(Parser, Debug)]
29#[command(
30    name = "stygian",
31    about = "High-performance graph-based scraping engine",
32    version,
33    propagate_version = true
34)]
35pub struct Cli {
36    /// The sub-command to execute
37    #[command(subcommand)]
38    pub command: Commands,
39}
40
41/// Available sub-commands for the stygian CLI
42#[derive(Subcommand, Debug)]
43pub enum Commands {
44    /// Load and execute a pipeline from a TOML file
45    Run {
46        /// Path to the pipeline TOML file
47        file: String,
48        /// Re-run the pipeline whenever the file changes on disk
49        #[arg(long)]
50        watch: bool,
51        /// Polling interval for watch mode (seconds)
52        #[arg(long, default_value = "5")]
53        watch_interval: u64,
54    },
55    /// Validate a pipeline TOML file without executing it
56    Check {
57        /// Path to the pipeline TOML file
58        file: String,
59    },
60    /// List all registered scraping services with health status
61    ListServices,
62    /// List all available AI providers and their capabilities
63    ListProviders,
64    /// Generate a visualisation of the pipeline DAG
65    GraphViz {
66        /// Path to the pipeline TOML file
67        file: String,
68        /// Output format: dot (Graphviz) or mermaid
69        #[arg(long, default_value = "dot")]
70        format: VizFormat,
71    },
72}
73
74/// Visualisation output format
75#[derive(Clone, Debug, ValueEnum)]
76pub enum VizFormat {
77    /// Graphviz DOT language
78    Dot,
79    /// Mermaid flowchart
80    Mermaid,
81}
82
83// ─── Entry point ─────────────────────────────────────────────────────────────
84
85/// CLI entry point.
86///
87/// Initialises tracing (honouring `RUST_LOG`; defaults to `info`) and
88/// dispatches the requested sub-command.
89///
90/// # Example
91///
92/// ```rust,no_run
93/// use stygian_graph::application::cli::run_cli;
94///
95/// #[tokio::main]
96/// async fn main() -> anyhow::Result<()> {
97///     run_cli().await
98/// }
99/// ```
100pub async fn run_cli() -> anyhow::Result<()> {
101    // Initialise tracing with RUST_LOG defaulting to "info"
102    let filter = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string());
103    let _ = tracing_subscriber::fmt()
104        .with_env_filter(filter)
105        .with_target(false)
106        .compact()
107        .try_init();
108
109    let cli = Cli::parse();
110
111    match cli.command {
112        Commands::Run {
113            file,
114            watch,
115            watch_interval,
116        } => cmd_run(&file, watch, watch_interval).await,
117        Commands::Check { file } => cmd_check(&file),
118        Commands::ListServices => {
119            cmd_list_services();
120            Ok(())
121        }
122        Commands::ListProviders => {
123            cmd_list_providers();
124            Ok(())
125        }
126        Commands::GraphViz { file, format } => cmd_graph_viz(&file, format),
127    }
128}
129
130// ─── run ─────────────────────────────────────────────────────────────────────
131
132async fn cmd_run(file: &str, watch: bool, watch_interval: u64) -> anyhow::Result<()> {
133    if watch {
134        info!("Watch mode enabled — polling every {watch_interval}s");
135        run_pipeline_once(file).await?;
136
137        let path = file.to_string();
138        let handle = PipelineWatcher::new(file)
139            .with_interval(Duration::from_secs(watch_interval))
140            .watch(move |def| {
141                info!(
142                    "Pipeline reloaded ({} nodes, {} services)",
143                    def.nodes.len(),
144                    def.services.len()
145                );
146                let path2 = path.clone();
147                tokio::spawn(async move {
148                    if let Err(e) = run_pipeline_once(&path2).await {
149                        error!("Pipeline run failed: {e}");
150                    }
151                });
152            });
153
154        // Block until Ctrl-C
155        tokio::signal::ctrl_c().await?;
156        handle.abort();
157    } else {
158        run_pipeline_once(file).await?;
159    }
160    Ok(())
161}
162
163async fn run_pipeline_once(file: &str) -> anyhow::Result<()> {
164    info!(file, "Loading pipeline");
165
166    let def = PipelineParser::from_figment_file(file)
167        .map_err(|e| anyhow::anyhow!("Failed to load pipeline: {e}"))?;
168
169    def.validate()
170        .map_err(|e| anyhow::anyhow!("Pipeline validation failed: {e}"))?;
171
172    let order = def
173        .topological_order()
174        .map_err(|e| anyhow::anyhow!("Topological sort failed: {e}"))?;
175
176    info!(
177        nodes = order.len(),
178        services = def.services.len(),
179        "Pipeline loaded successfully"
180    );
181
182    // Build progress bars
183    let mp = MultiProgress::new();
184    let style =
185        ProgressStyle::with_template("{spinner:.cyan} [{bar:40.cyan/blue}] {pos}/{len} {msg}")?
186            .progress_chars("=>-");
187
188    let overall = mp.add(ProgressBar::new(order.len() as u64));
189    overall.set_style(style.clone());
190    overall.set_message("executing pipeline");
191
192    for node_name in &order {
193        let node = def
194            .nodes
195            .iter()
196            .find(|n| &n.name == node_name)
197            .ok_or_else(|| {
198                anyhow::anyhow!("BUG: node '{node_name}' from topological_order not found in nodes")
199            })?;
200
201        let bar = mp.add(ProgressBar::new(3));
202        bar.set_style(ProgressStyle::with_template("  {spinner:.green} {msg}")?);
203        bar.set_message(format!(
204            "[{}] {} ({})",
205            node_name,
206            node.service,
207            node.url.as_deref().unwrap_or("-")
208        ));
209        bar.enable_steady_tick(Duration::from_millis(120));
210
211        // Simulate node execution stages: fetch → process → complete
212        tokio::time::sleep(Duration::from_millis(50)).await;
213        bar.inc(1);
214        tokio::time::sleep(Duration::from_millis(50)).await;
215        bar.inc(1);
216        tokio::time::sleep(Duration::from_millis(50)).await;
217        bar.inc(1);
218
219        bar.finish_with_message(format!("✓ {node_name}"));
220        overall.inc(1);
221    }
222
223    overall.finish_with_message("pipeline complete");
224    info!(file, "Pipeline execution finished");
225    Ok(())
226}
227
228// ─── check ────────────────────────────────────────────────────────────────────
229
230fn cmd_check(file: &str) -> anyhow::Result<()> {
231    println!("Checking pipeline: {file}");
232
233    let def =
234        PipelineParser::from_figment_file(file).map_err(|e| anyhow::anyhow!("Parse error: {e}"))?;
235
236    println!(
237        "  {} nodes, {} services declared",
238        def.nodes.len(),
239        def.services.len()
240    );
241
242    def.validate()
243        .map_err(|e| anyhow::anyhow!("Validation failed: {e}"))?;
244    let order = def
245        .topological_order()
246        .map_err(|e| anyhow::anyhow!("Topological sort failed: {e}"))?;
247    println!("  ✓ Validation passed");
248    println!("  Execution order: {}", order.join(" → "));
249
250    Ok(())
251}
252
253// ─── list-services ────────────────────────────────────────────────────────────
254
255fn cmd_list_services() {
256    let registry = global_registry();
257    let names = registry.names();
258
259    if names.is_empty() {
260        println!("No services registered.");
261        println!("Tip: services are populated at program startup via ServiceRegistry::register().");
262        return;
263    }
264
265    println!("{:<24} STATUS", "SERVICE");
266    println!("{}", "-".repeat(40));
267
268    for name in &names {
269        let status = registry
270            .status(name)
271            .unwrap_or(crate::application::registry::ServiceStatus::Unknown);
272        let status_str = match &status {
273            crate::application::registry::ServiceStatus::Healthy => "healthy".to_string(),
274            crate::application::registry::ServiceStatus::Degraded(msg) => {
275                format!("degraded ({msg})")
276            }
277            crate::application::registry::ServiceStatus::Unavailable(msg) => {
278                format!("unavailable ({msg})")
279            }
280            crate::application::registry::ServiceStatus::Unknown => "unknown".to_string(),
281        };
282        println!("{name:<24} {status_str}");
283    }
284}
285
286// ─── list-providers ───────────────────────────────────────────────────────────
287
288/// Static descriptor for a known AI provider
289struct ProviderInfo {
290    name: &'static str,
291    models: &'static str,
292    streaming: bool,
293    vision: bool,
294    tool_use: bool,
295    json_mode: bool,
296}
297
298fn cmd_list_providers() {
299    const fn flag(b: bool) -> &'static str {
300        if b { "✓" } else { "✗" }
301    }
302
303    let providers = [
304        ProviderInfo {
305            name: "claude (Anthropic)",
306            models: "claude-sonnet-4-5, claude-3-5-sonnet",
307            streaming: true,
308            vision: true,
309            tool_use: true,
310            json_mode: true,
311        },
312        ProviderInfo {
313            name: "openai (ChatGPT)",
314            models: "gpt-4o, gpt-4-turbo, gpt-3.5-turbo",
315            streaming: true,
316            vision: true,
317            tool_use: true,
318            json_mode: true,
319        },
320        ProviderInfo {
321            name: "gemini (Google)",
322            models: "gemini-1.5-pro, gemini-1.5-flash",
323            streaming: true,
324            vision: true,
325            tool_use: true,
326            json_mode: true,
327        },
328        ProviderInfo {
329            name: "copilot (GitHub)",
330            models: "gpt-4o, claude-3.5-sonnet (via Copilot API)",
331            streaming: true,
332            vision: false,
333            tool_use: true,
334            json_mode: false,
335        },
336        ProviderInfo {
337            name: "ollama (Local)",
338            models: "llama3, mistral, phi3, codellama (any pulled model)",
339            streaming: true,
340            vision: false,
341            tool_use: false,
342            json_mode: true,
343        },
344    ];
345
346    println!(
347        "{:<28} {:<8} {:<8} {:<10} {:<10}  MODELS",
348        "PROVIDER", "STREAM", "VISION", "TOOL_USE", "JSON_MODE"
349    );
350    println!("{}", "-".repeat(90));
351
352    for p in &providers {
353        println!(
354            "{:<28} {:<8} {:<8} {:<10} {:<10}  {}",
355            p.name,
356            flag(p.streaming),
357            flag(p.vision),
358            flag(p.tool_use),
359            flag(p.json_mode),
360            p.models
361        );
362    }
363
364    println!();
365    println!("Configure via TOML [[services]] blocks or STYGIAN_* environment variables.");
366}
367
368// ─── graph-viz ────────────────────────────────────────────────────────────────
369
370#[allow(clippy::needless_pass_by_value)]
371fn cmd_graph_viz(file: &str, format: VizFormat) -> anyhow::Result<()> {
372    let def = PipelineParser::from_figment_file(file)
373        .map_err(|e| anyhow::anyhow!("Failed to load pipeline: {e}"))?;
374
375    def.validate()
376        .map_err(|e| anyhow::anyhow!("Pipeline validation failed: {e}"))?;
377
378    let output = match format {
379        VizFormat::Dot => def.to_dot(),
380        VizFormat::Mermaid => def.to_mermaid(),
381    };
382
383    println!("{output}");
384    Ok(())
385}
386
387// ─── Tests ────────────────────────────────────────────────────────────────────
388
389#[cfg(test)]
390#[allow(clippy::unwrap_used)]
391mod tests {
392    use super::*;
393    use clap::CommandFactory;
394
395    #[test]
396    fn cli_help_generates_without_panic() {
397        // Verify the clap schema compiles and produces output
398        let mut cmd = Cli::command();
399        let _ = cmd.render_help();
400    }
401
402    #[test]
403    fn cli_parses_check_subcommand() {
404        let cli = Cli::try_parse_from(["stygian", "check", "pipeline.toml"]).unwrap();
405        assert!(matches!(cli.command, Commands::Check { file } if file == "pipeline.toml"));
406    }
407
408    #[test]
409    fn cli_parses_list_services() {
410        let cli = Cli::try_parse_from(["stygian", "list-services"]).unwrap();
411        assert!(matches!(cli.command, Commands::ListServices));
412    }
413
414    #[test]
415    fn cli_parses_list_providers() {
416        let cli = Cli::try_parse_from(["stygian", "list-providers"]).unwrap();
417        assert!(matches!(cli.command, Commands::ListProviders));
418    }
419
420    #[test]
421    fn cli_parses_graph_viz_dot() {
422        let cli = Cli::try_parse_from(["stygian", "graph-viz", "pipeline.toml", "--format", "dot"])
423            .unwrap();
424        assert!(matches!(
425            cli.command,
426            Commands::GraphViz {
427                format: VizFormat::Dot,
428                ..
429            }
430        ));
431    }
432
433    #[test]
434    fn cli_parses_graph_viz_mermaid() {
435        let cli = Cli::try_parse_from([
436            "stygian",
437            "graph-viz",
438            "pipeline.toml",
439            "--format",
440            "mermaid",
441        ])
442        .unwrap();
443        assert!(matches!(
444            cli.command,
445            Commands::GraphViz {
446                format: VizFormat::Mermaid,
447                ..
448            }
449        ));
450    }
451
452    #[test]
453    fn cli_parses_run_with_watch() {
454        let cli = Cli::try_parse_from(["stygian", "run", "pipeline.toml", "--watch"]).unwrap();
455        assert!(matches!(cli.command, Commands::Run { watch: true, .. }));
456    }
457
458    #[test]
459    fn cmd_list_providers_succeeds() {
460        cmd_list_providers();
461    }
462
463    #[test]
464    fn cmd_list_services_succeeds_empty_registry() {
465        // global registry is empty in tests — should succeed with a "no services" message
466        cmd_list_services();
467    }
468
469    /// Helper: write a minimal valid pipeline TOML to a `NamedTempFile`
470    fn minimal_pipeline_toml() -> tempfile::NamedTempFile {
471        use std::io::Write as _;
472        let mut tmp = tempfile::NamedTempFile::new().unwrap();
473        writeln!(
474            tmp,
475            r#"
476[[services]]
477name = "http"
478kind = "http"
479
480[[nodes]]
481name = "fetch"
482service = "http"
483url = "https://example.com"
484"#
485        )
486        .unwrap();
487        tmp
488    }
489
490    #[test]
491    fn cmd_check_valid_toml_succeeds() {
492        let tmp = minimal_pipeline_toml();
493        cmd_check(tmp.path().to_str().unwrap()).unwrap();
494    }
495
496    #[test]
497    fn cmd_graph_viz_dot_format_succeeds() {
498        let tmp = minimal_pipeline_toml();
499        cmd_graph_viz(tmp.path().to_str().unwrap(), VizFormat::Dot).unwrap();
500    }
501
502    #[test]
503    fn cmd_graph_viz_mermaid_format_succeeds() {
504        let tmp = minimal_pipeline_toml();
505        cmd_graph_viz(tmp.path().to_str().unwrap(), VizFormat::Mermaid).unwrap();
506    }
507}