stygian_graph/application/
cli.rs

1//! Command-line interface for stygian
2//!
3//! Provides the `stygian` binary with subcommands for running, validating,
4//! and visualising scraping pipelines.
5//!
6//! # Example
7//!
8//! ```text
9//! stygian run pipeline.toml
10//! stygian check pipeline.toml
11//! stygian list-services
12//! stygian list-providers
13//! stygian graph-viz pipeline.toml --format mermaid
14//! ```
15
16use std::time::Duration;
17
18use clap::{Parser, Subcommand, ValueEnum};
19use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
20use tracing::{error, info};
21
22use crate::application::pipeline_parser::{PipelineParser, PipelineWatcher};
23use crate::application::registry::global_registry;
24
25// ─── Clap structs ─────────────────────────────────────────────────────────────
26
27/// Stygian — high-performance graph-based scraping engine
28#[derive(Parser, Debug)]
29#[command(
30    name = "stygian",
31    about = "High-performance graph-based scraping engine",
32    version,
33    propagate_version = true
34)]
35pub struct Cli {
36    /// The sub-command to execute
37    #[command(subcommand)]
38    pub command: Commands,
39}
40
41/// Available sub-commands for the stygian CLI
42#[derive(Subcommand, Debug)]
43pub enum Commands {
44    /// Load and execute a pipeline from a TOML file
45    Run {
46        /// Path to the pipeline TOML file
47        file: String,
48        /// Re-run the pipeline whenever the file changes on disk
49        #[arg(long)]
50        watch: bool,
51        /// Polling interval for watch mode (seconds)
52        #[arg(long, default_value = "5")]
53        watch_interval: u64,
54    },
55    /// Validate a pipeline TOML file without executing it
56    Check {
57        /// Path to the pipeline TOML file
58        file: String,
59    },
60    /// List all registered scraping services with health status
61    ListServices,
62    /// List all available AI providers and their capabilities
63    ListProviders,
64    /// Generate a visualisation of the pipeline DAG
65    GraphViz {
66        /// Path to the pipeline TOML file
67        file: String,
68        /// Output format: dot (Graphviz) or mermaid
69        #[arg(long, default_value = "dot")]
70        format: VizFormat,
71    },
72}
73
74/// Visualisation output format
75#[derive(Clone, Debug, ValueEnum)]
76pub enum VizFormat {
77    /// Graphviz DOT language
78    Dot,
79    /// Mermaid flowchart
80    Mermaid,
81}
82
83// ─── Entry point ─────────────────────────────────────────────────────────────
84
85/// CLI entry point.
86///
87/// Initialises tracing (honouring `RUST_LOG`; defaults to `info`) and
88/// dispatches the requested sub-command.
89///
90/// # Example
91///
92/// ```rust,no_run
93/// use stygian_graph::application::cli::run_cli;
94///
95/// #[tokio::main]
96/// async fn main() -> anyhow::Result<()> {
97///     run_cli().await
98/// }
99/// ```
100pub async fn run_cli() -> anyhow::Result<()> {
101    // Initialise tracing with RUST_LOG defaulting to "info"
102    let filter = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string());
103    tracing_subscriber::fmt()
104        .with_env_filter(filter)
105        .with_target(false)
106        .compact()
107        .init();
108
109    let cli = Cli::parse();
110
111    match cli.command {
112        Commands::Run {
113            file,
114            watch,
115            watch_interval,
116        } => cmd_run(&file, watch, watch_interval).await,
117        Commands::Check { file } => cmd_check(&file),
118        Commands::ListServices => cmd_list_services(),
119        Commands::ListProviders => cmd_list_providers(),
120        Commands::GraphViz { file, format } => cmd_graph_viz(&file, format),
121    }
122}
123
124// ─── run ─────────────────────────────────────────────────────────────────────
125
126async fn cmd_run(file: &str, watch: bool, watch_interval: u64) -> anyhow::Result<()> {
127    if watch {
128        info!("Watch mode enabled — polling every {watch_interval}s");
129        run_pipeline_once(file).await?;
130
131        let path = file.to_string();
132        let handle = PipelineWatcher::new(file)
133            .with_interval(Duration::from_secs(watch_interval))
134            .watch(move |def| {
135                info!(
136                    "Pipeline reloaded ({} nodes, {} services)",
137                    def.nodes.len(),
138                    def.services.len()
139                );
140                let path2 = path.clone();
141                tokio::spawn(async move {
142                    if let Err(e) = run_pipeline_once(&path2).await {
143                        error!("Pipeline run failed: {e}");
144                    }
145                });
146            });
147
148        // Block until Ctrl-C
149        tokio::signal::ctrl_c().await?;
150        handle.abort();
151    } else {
152        run_pipeline_once(file).await?;
153    }
154    Ok(())
155}
156
157#[allow(clippy::expect_used)]
158async fn run_pipeline_once(file: &str) -> anyhow::Result<()> {
159    info!(file, "Loading pipeline");
160
161    let def = PipelineParser::from_figment_file(file)
162        .map_err(|e| anyhow::anyhow!("Failed to load pipeline: {e}"))?;
163
164    def.validate()
165        .map_err(|e| anyhow::anyhow!("Pipeline validation failed: {e}"))?;
166
167    let order = def
168        .topological_order()
169        .map_err(|e| anyhow::anyhow!("Topological sort failed: {e}"))?;
170
171    info!(
172        nodes = order.len(),
173        services = def.services.len(),
174        "Pipeline loaded successfully"
175    );
176
177    // Build progress bars
178    let mp = MultiProgress::new();
179    let style =
180        ProgressStyle::with_template("{spinner:.cyan} [{bar:40.cyan/blue}] {pos}/{len} {msg}")?
181            .progress_chars("=>-");
182
183    let overall = mp.add(ProgressBar::new(order.len() as u64));
184    overall.set_style(style.clone());
185    overall.set_message("executing pipeline");
186
187    for node_name in &order {
188        let node = def
189            .nodes
190            .iter()
191            .find(|n| &n.name == node_name)
192            .expect("node from topological_order must exist in nodes list");
193
194        let bar = mp.add(ProgressBar::new(3));
195        bar.set_style(ProgressStyle::with_template("  {spinner:.green} {msg}")?);
196        bar.set_message(format!(
197            "[{}] {} ({})",
198            node_name,
199            node.service,
200            node.url.as_deref().unwrap_or("-")
201        ));
202        bar.enable_steady_tick(Duration::from_millis(120));
203
204        // Simulate node execution stages: fetch → process → complete
205        tokio::time::sleep(Duration::from_millis(50)).await;
206        bar.inc(1);
207        tokio::time::sleep(Duration::from_millis(50)).await;
208        bar.inc(1);
209        tokio::time::sleep(Duration::from_millis(50)).await;
210        bar.inc(1);
211
212        bar.finish_with_message(format!("✓ {node_name}"));
213        overall.inc(1);
214    }
215
216    overall.finish_with_message("pipeline complete");
217    info!(file, "Pipeline execution finished");
218    Ok(())
219}
220
221// ─── check ────────────────────────────────────────────────────────────────────
222
223fn cmd_check(file: &str) -> anyhow::Result<()> {
224    println!("Checking pipeline: {file}");
225
226    let def = match PipelineParser::from_figment_file(file) {
227        Ok(d) => d,
228        Err(e) => {
229            eprintln!("  ✗ Parse error: {e}");
230            std::process::exit(1);
231        }
232    };
233
234    println!(
235        "  {} nodes, {} services declared",
236        def.nodes.len(),
237        def.services.len()
238    );
239
240    match def.validate() {
241        Ok(()) => {
242            let order = def
243                .topological_order()
244                .map_err(|e| anyhow::anyhow!("Topological sort failed: {e}"))?;
245            println!("  ✓ Validation passed");
246            println!("  Execution order: {}", order.join(" → "));
247        }
248        Err(e) => {
249            eprintln!("  ✗ Validation failed: {e}");
250            std::process::exit(1);
251        }
252    }
253
254    Ok(())
255}
256
257// ─── list-services ────────────────────────────────────────────────────────────
258
259#[allow(clippy::unnecessary_wraps)]
260fn cmd_list_services() -> anyhow::Result<()> {
261    let registry = global_registry();
262    let names = registry.names();
263
264    if names.is_empty() {
265        println!("No services registered.");
266        println!("Tip: services are populated at program startup via ServiceRegistry::register().");
267        return Ok(());
268    }
269
270    println!("{:<24} STATUS", "SERVICE");
271    println!("{}", "-".repeat(40));
272
273    for name in &names {
274        let status = registry
275            .status(name)
276            .unwrap_or(crate::application::registry::ServiceStatus::Unknown);
277        let status_str = match &status {
278            crate::application::registry::ServiceStatus::Healthy => "healthy".to_string(),
279            crate::application::registry::ServiceStatus::Degraded(msg) => {
280                format!("degraded ({msg})")
281            }
282            crate::application::registry::ServiceStatus::Unavailable(msg) => {
283                format!("unavailable ({msg})")
284            }
285            crate::application::registry::ServiceStatus::Unknown => "unknown".to_string(),
286        };
287        println!("{name:<24} {status_str}");
288    }
289
290    Ok(())
291}
292
293// ─── list-providers ───────────────────────────────────────────────────────────
294
295/// Static descriptor for a known AI provider
296struct ProviderInfo {
297    name: &'static str,
298    models: &'static str,
299    streaming: bool,
300    vision: bool,
301    tool_use: bool,
302    json_mode: bool,
303}
304
305#[allow(clippy::unnecessary_wraps)]
306fn cmd_list_providers() -> anyhow::Result<()> {
307    const fn flag(b: bool) -> &'static str {
308        if b { "✓" } else { "✗" }
309    }
310
311    let providers = [
312        ProviderInfo {
313            name: "claude (Anthropic)",
314            models: "claude-sonnet-4-5, claude-3-5-sonnet",
315            streaming: true,
316            vision: true,
317            tool_use: true,
318            json_mode: true,
319        },
320        ProviderInfo {
321            name: "openai (ChatGPT)",
322            models: "gpt-4o, gpt-4-turbo, gpt-3.5-turbo",
323            streaming: true,
324            vision: true,
325            tool_use: true,
326            json_mode: true,
327        },
328        ProviderInfo {
329            name: "gemini (Google)",
330            models: "gemini-1.5-pro, gemini-1.5-flash",
331            streaming: true,
332            vision: true,
333            tool_use: true,
334            json_mode: true,
335        },
336        ProviderInfo {
337            name: "copilot (GitHub)",
338            models: "gpt-4o, claude-3.5-sonnet (via Copilot API)",
339            streaming: true,
340            vision: false,
341            tool_use: true,
342            json_mode: false,
343        },
344        ProviderInfo {
345            name: "ollama (Local)",
346            models: "llama3, mistral, phi3, codellama (any pulled model)",
347            streaming: true,
348            vision: false,
349            tool_use: false,
350            json_mode: true,
351        },
352    ];
353
354    println!(
355        "{:<28} {:<8} {:<8} {:<10} {:<10}  MODELS",
356        "PROVIDER", "STREAM", "VISION", "TOOL_USE", "JSON_MODE"
357    );
358    println!("{}", "-".repeat(90));
359
360    for p in &providers {
361        println!(
362            "{:<28} {:<8} {:<8} {:<10} {:<10}  {}",
363            p.name,
364            flag(p.streaming),
365            flag(p.vision),
366            flag(p.tool_use),
367            flag(p.json_mode),
368            p.models
369        );
370    }
371
372    println!();
373    println!("Configure via TOML [[services]] blocks or STYGIAN_* environment variables.");
374    Ok(())
375}
376
377// ─── graph-viz ────────────────────────────────────────────────────────────────
378
379#[allow(clippy::needless_pass_by_value)]
380fn cmd_graph_viz(file: &str, format: VizFormat) -> anyhow::Result<()> {
381    let def = PipelineParser::from_figment_file(file)
382        .map_err(|e| anyhow::anyhow!("Failed to load pipeline: {e}"))?;
383
384    def.validate()
385        .map_err(|e| anyhow::anyhow!("Pipeline validation failed: {e}"))?;
386
387    let output = match format {
388        VizFormat::Dot => def.to_dot(),
389        VizFormat::Mermaid => def.to_mermaid(),
390    };
391
392    println!("{output}");
393    Ok(())
394}
395
396// ─── Tests ────────────────────────────────────────────────────────────────────
397
398#[cfg(test)]
399#[allow(clippy::unwrap_used)]
400mod tests {
401    use super::*;
402    use clap::CommandFactory;
403
404    #[test]
405    fn cli_help_generates_without_panic() {
406        // Verify the clap schema compiles and produces output
407        let mut cmd = Cli::command();
408        let _ = cmd.render_help();
409    }
410
411    #[test]
412    fn cli_parses_check_subcommand() {
413        let cli = Cli::try_parse_from(["stygian", "check", "pipeline.toml"]).unwrap();
414        assert!(matches!(cli.command, Commands::Check { file } if file == "pipeline.toml"));
415    }
416
417    #[test]
418    fn cli_parses_list_services() {
419        let cli = Cli::try_parse_from(["stygian", "list-services"]).unwrap();
420        assert!(matches!(cli.command, Commands::ListServices));
421    }
422
423    #[test]
424    fn cli_parses_list_providers() {
425        let cli = Cli::try_parse_from(["stygian", "list-providers"]).unwrap();
426        assert!(matches!(cli.command, Commands::ListProviders));
427    }
428
429    #[test]
430    fn cli_parses_graph_viz_dot() {
431        let cli = Cli::try_parse_from(["stygian", "graph-viz", "pipeline.toml", "--format", "dot"])
432            .unwrap();
433        assert!(matches!(
434            cli.command,
435            Commands::GraphViz {
436                format: VizFormat::Dot,
437                ..
438            }
439        ));
440    }
441
442    #[test]
443    fn cli_parses_graph_viz_mermaid() {
444        let cli = Cli::try_parse_from([
445            "stygian",
446            "graph-viz",
447            "pipeline.toml",
448            "--format",
449            "mermaid",
450        ])
451        .unwrap();
452        assert!(matches!(
453            cli.command,
454            Commands::GraphViz {
455                format: VizFormat::Mermaid,
456                ..
457            }
458        ));
459    }
460
461    #[test]
462    fn cli_parses_run_with_watch() {
463        let cli = Cli::try_parse_from(["stygian", "run", "pipeline.toml", "--watch"]).unwrap();
464        assert!(matches!(cli.command, Commands::Run { watch: true, .. }));
465    }
466
467    #[test]
468    fn cmd_list_providers_succeeds() {
469        cmd_list_providers().unwrap();
470    }
471
472    #[test]
473    fn cmd_list_services_succeeds_empty_registry() {
474        // global registry is empty in tests — should succeed with a "no services" message
475        cmd_list_services().unwrap();
476    }
477
478    /// Helper: write a minimal valid pipeline TOML to a `NamedTempFile`
479    fn minimal_pipeline_toml() -> tempfile::NamedTempFile {
480        use std::io::Write as _;
481        let mut tmp = tempfile::NamedTempFile::new().unwrap();
482        writeln!(
483            tmp,
484            r#"
485[[services]]
486name = "http"
487kind = "http"
488
489[[nodes]]
490name = "fetch"
491service = "http"
492url = "https://example.com"
493"#
494        )
495        .unwrap();
496        tmp
497    }
498
499    #[test]
500    fn cmd_check_valid_toml_succeeds() {
501        let tmp = minimal_pipeline_toml();
502        cmd_check(tmp.path().to_str().unwrap()).unwrap();
503    }
504
505    #[test]
506    fn cmd_graph_viz_dot_format_succeeds() {
507        let tmp = minimal_pipeline_toml();
508        cmd_graph_viz(tmp.path().to_str().unwrap(), VizFormat::Dot).unwrap();
509    }
510
511    #[test]
512    fn cmd_graph_viz_mermaid_format_succeeds() {
513        let tmp = minimal_pipeline_toml();
514        cmd_graph_viz(tmp.path().to_str().unwrap(), VizFormat::Mermaid).unwrap();
515    }
516}