Skip to main content

stygian_graph/application/
cli.rs

1//! Command-line interface for stygian
2//!
3//! Provides the `stygian` binary with subcommands for running, validating,
4//! and visualising scraping pipelines.
5//!
6//! # Example
7//!
8//! ```text
9//! stygian run pipeline.toml
10//! stygian check pipeline.toml
11//! stygian list-services
12//! stygian list-providers
13//! stygian graph-viz pipeline.toml --format mermaid
14//! ```
15
16use std::time::Duration;
17
18use clap::{Parser, Subcommand, ValueEnum};
19use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
20use tracing::{error, info};
21
22use crate::application::pipeline_parser::{PipelineParser, PipelineWatcher};
23use crate::application::registry::global_registry;
24
25// ─── Clap structs ─────────────────────────────────────────────────────────────
26
27/// Stygian — high-performance graph-based scraping engine
28#[derive(Parser, Debug)]
29#[command(
30    name = "stygian",
31    about = "High-performance graph-based scraping engine",
32    version,
33    propagate_version = true
34)]
35pub struct Cli {
36    /// The sub-command to execute
37    #[command(subcommand)]
38    pub command: Commands,
39}
40
41/// Available sub-commands for the stygian CLI
42#[derive(Subcommand, Debug)]
43pub enum Commands {
44    /// Load and execute a pipeline from a TOML file
45    Run {
46        /// Path to the pipeline TOML file
47        file: String,
48        /// Re-run the pipeline whenever the file changes on disk
49        #[arg(long)]
50        watch: bool,
51        /// Polling interval for watch mode (seconds)
52        #[arg(long, default_value = "5")]
53        watch_interval: u64,
54    },
55    /// Validate a pipeline TOML file without executing it
56    Check {
57        /// Path to the pipeline TOML file
58        file: String,
59    },
60    /// List all registered scraping services with health status
61    ListServices,
62    /// List all available AI providers and their capabilities
63    ListProviders,
64    /// Generate a visualisation of the pipeline DAG
65    GraphViz {
66        /// Path to the pipeline TOML file
67        file: String,
68        /// Output format: dot (Graphviz) or mermaid
69        #[arg(long, default_value = "dot")]
70        format: VizFormat,
71    },
72}
73
74/// Visualisation output format
75#[derive(Clone, Debug, ValueEnum)]
76pub enum VizFormat {
77    /// Graphviz DOT language
78    Dot,
79    /// Mermaid flowchart
80    Mermaid,
81}
82
83// ─── Entry point ─────────────────────────────────────────────────────────────
84
85/// CLI entry point.
86///
87/// Initialises tracing (honouring `RUST_LOG`; defaults to `info`) and
88/// dispatches the requested sub-command.
89///
90/// # Example
91///
92/// ```rust,no_run
93/// use stygian_graph::application::cli::run_cli;
94///
95/// #[tokio::main]
96/// async fn main() -> anyhow::Result<()> {
97///     run_cli().await
98/// }
99/// ```
100///
101/// # Errors
102///
103/// Returns `anyhow::Error` when the CLI encounters an invalid subcommand, a
104/// config file cannot be read or parsed, or the underlying pipeline execution
105/// fails. The `anyhow` wrapper is used here because CLI entry points are the
106/// only place in the workspace that may use `anyhow`.
107pub async fn run_cli() -> anyhow::Result<()> {
108    // Initialise tracing with RUST_LOG defaulting to "info"
109    let filter = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string());
110    let _ = tracing_subscriber::fmt()
111        .with_env_filter(filter)
112        .with_target(false)
113        .compact()
114        .try_init();
115
116    let cli = Cli::parse();
117
118    match cli.command {
119        Commands::Run {
120            file,
121            watch,
122            watch_interval,
123        } => cmd_run(&file, watch, watch_interval).await,
124        Commands::Check { file } => cmd_check(&file),
125        Commands::ListServices => {
126            cmd_list_services();
127            Ok(())
128        }
129        Commands::ListProviders => {
130            cmd_list_providers();
131            Ok(())
132        }
133        Commands::GraphViz { file, format } => cmd_graph_viz(&file, format),
134    }
135}
136
137// ─── run ─────────────────────────────────────────────────────────────────────
138
139async fn cmd_run(file: &str, watch: bool, watch_interval: u64) -> anyhow::Result<()> {
140    if watch {
141        info!("Watch mode enabled — polling every {watch_interval}s");
142        run_pipeline_once(file).await?;
143
144        let path = file.to_string();
145        let handle = PipelineWatcher::new(file)
146            .with_interval(Duration::from_secs(watch_interval))
147            .watch(move |def| {
148                info!(
149                    "Pipeline reloaded ({} nodes, {} services)",
150                    def.nodes.len(),
151                    def.services.len()
152                );
153                let path2 = path.clone();
154                tokio::spawn(async move {
155                    if let Err(e) = run_pipeline_once(&path2).await {
156                        // codeql[rust/unused-variable] - `e` is used via the captured format arg below.
157                        error!("Pipeline run failed: {e}");
158                    }
159                });
160            });
161
162        // Block until Ctrl-C
163        tokio::signal::ctrl_c().await?;
164        handle.abort();
165    } else {
166        run_pipeline_once(file).await?;
167    }
168    Ok(())
169}
170
171async fn run_pipeline_once(file: &str) -> anyhow::Result<()> {
172    info!(file, "Loading pipeline");
173
174    let def = PipelineParser::from_figment_file(file)
175        .map_err(|e| anyhow::anyhow!("Failed to load pipeline: {e}"))?;
176
177    def.validate()
178        .map_err(|e| anyhow::anyhow!("Pipeline validation failed: {e}"))?;
179
180    let order = def
181        .topological_order()
182        .map_err(|e| anyhow::anyhow!("Topological sort failed: {e}"))?;
183
184    info!(
185        nodes = order.len(),
186        services = def.services.len(),
187        "Pipeline loaded successfully"
188    );
189
190    // Build progress bars
191    let mp = MultiProgress::new();
192    let style =
193        ProgressStyle::with_template("{spinner:.cyan} [{bar:40.cyan/blue}] {pos}/{len} {msg}")?
194            .progress_chars("=>-");
195
196    let overall = mp.add(ProgressBar::new(order.len() as u64));
197    overall.set_style(style.clone());
198    overall.set_message("executing pipeline");
199
200    for node_name in &order {
201        let node = def
202            .nodes
203            .iter()
204            .find(|n| &n.name == node_name)
205            .ok_or_else(|| {
206                anyhow::anyhow!("BUG: node '{node_name}' from topological_order not found in nodes")
207            })?;
208
209        let bar = mp.add(ProgressBar::new(3));
210        bar.set_style(ProgressStyle::with_template("  {spinner:.green} {msg}")?);
211        bar.set_message(format!(
212            "[{}] {} ({})",
213            node_name,
214            node.service,
215            node.url.as_deref().unwrap_or("-")
216        ));
217        bar.enable_steady_tick(Duration::from_millis(120));
218
219        // Simulate node execution stages: fetch → process → complete
220        tokio::time::sleep(Duration::from_millis(50)).await;
221        bar.inc(1);
222        tokio::time::sleep(Duration::from_millis(50)).await;
223        bar.inc(1);
224        tokio::time::sleep(Duration::from_millis(50)).await;
225        bar.inc(1);
226
227        bar.finish_with_message(format!("✓ {node_name}"));
228        overall.inc(1);
229    }
230
231    overall.finish_with_message("pipeline complete");
232    info!(file, "Pipeline execution finished");
233    Ok(())
234}
235
236// ─── check ────────────────────────────────────────────────────────────────────
237
238fn cmd_check(file: &str) -> anyhow::Result<()> {
239    println!("Checking pipeline: {file}");
240
241    let def =
242        PipelineParser::from_figment_file(file).map_err(|e| anyhow::anyhow!("Parse error: {e}"))?;
243
244    println!(
245        "  {} nodes, {} services declared",
246        def.nodes.len(),
247        def.services.len()
248    );
249
250    def.validate()
251        .map_err(|e| anyhow::anyhow!("Validation failed: {e}"))?;
252    let order = def
253        .topological_order()
254        .map_err(|e| anyhow::anyhow!("Topological sort failed: {e}"))?;
255    println!("  ✓ Validation passed");
256    println!("  Execution order: {}", order.join(" → "));
257
258    Ok(())
259}
260
261// ─── list-services ────────────────────────────────────────────────────────────
262
263fn cmd_list_services() {
264    let registry = global_registry();
265    let names = registry.names();
266
267    if names.is_empty() {
268        println!("No services registered.");
269        println!("Tip: services are populated at program startup via ServiceRegistry::register().");
270        return;
271    }
272
273    println!("{:<24} STATUS", "SERVICE");
274    println!("{}", "-".repeat(40));
275
276    for name in &names {
277        let status = registry
278            .status(name)
279            .unwrap_or(crate::application::registry::ServiceStatus::Unknown);
280        let status_str = match &status {
281            crate::application::registry::ServiceStatus::Healthy => "healthy".to_string(),
282            crate::application::registry::ServiceStatus::Degraded(msg) => {
283                format!("degraded ({msg})")
284            }
285            crate::application::registry::ServiceStatus::Unavailable(msg) => {
286                format!("unavailable ({msg})")
287            }
288            crate::application::registry::ServiceStatus::Unknown => "unknown".to_string(),
289        };
290        println!("{name:<24} {status_str}");
291    }
292}
293
294// ─── list-providers ───────────────────────────────────────────────────────────
295
296/// Static descriptor for a known AI provider
297#[allow(clippy::struct_excessive_bools)] // 1 char flags are clearer than bitmasks for a 4-feature descriptor
298struct ProviderInfo {
299    name: &'static str,
300    models: &'static str,
301    streaming: bool,
302    vision: bool,
303    tool_use: bool,
304    json_mode: bool,
305}
306
307fn cmd_list_providers() {
308    const fn flag(b: bool) -> &'static str {
309        if b { "✓" } else { "✗" }
310    }
311
312    let providers = [
313        ProviderInfo {
314            name: "claude (Anthropic)",
315            models: "claude-sonnet-4-5, claude-3-5-sonnet",
316            streaming: true,
317            vision: true,
318            tool_use: true,
319            json_mode: true,
320        },
321        ProviderInfo {
322            name: "openai (ChatGPT)",
323            models: "gpt-4o, gpt-4-turbo, gpt-3.5-turbo",
324            streaming: true,
325            vision: true,
326            tool_use: true,
327            json_mode: true,
328        },
329        ProviderInfo {
330            name: "gemini (Google)",
331            models: "gemini-1.5-pro, gemini-1.5-flash",
332            streaming: true,
333            vision: true,
334            tool_use: true,
335            json_mode: true,
336        },
337        ProviderInfo {
338            name: "copilot (GitHub)",
339            models: "gpt-4o, claude-3.5-sonnet (via Copilot API)",
340            streaming: true,
341            vision: false,
342            tool_use: true,
343            json_mode: false,
344        },
345        ProviderInfo {
346            name: "ollama (Local)",
347            models: "llama3, mistral, phi3, codellama (any pulled model)",
348            streaming: true,
349            vision: false,
350            tool_use: false,
351            json_mode: true,
352        },
353    ];
354
355    println!(
356        "{:<28} {:<8} {:<8} {:<10} {:<10}  MODELS",
357        "PROVIDER", "STREAM", "VISION", "TOOL_USE", "JSON_MODE"
358    );
359    println!("{}", "-".repeat(90));
360
361    for p in &providers {
362        println!(
363            "{:<28} {:<8} {:<8} {:<10} {:<10}  {}",
364            p.name,
365            flag(p.streaming),
366            flag(p.vision),
367            flag(p.tool_use),
368            flag(p.json_mode),
369            p.models
370        );
371    }
372
373    println!();
374    println!("Configure via TOML [[services]] blocks or STYGIAN_* environment variables.");
375}
376
377// ─── graph-viz ────────────────────────────────────────────────────────────────
378
379#[allow(clippy::needless_pass_by_value)]
380fn cmd_graph_viz(file: &str, format: VizFormat) -> anyhow::Result<()> {
381    let def = PipelineParser::from_figment_file(file)
382        .map_err(|e| anyhow::anyhow!("Failed to load pipeline: {e}"))?;
383
384    def.validate()
385        .map_err(|e| anyhow::anyhow!("Pipeline validation failed: {e}"))?;
386
387    let output = match format {
388        VizFormat::Dot => def.to_dot(),
389        VizFormat::Mermaid => def.to_mermaid(),
390    };
391
392    println!("{output}");
393    Ok(())
394}
395
396// ─── Tests ────────────────────────────────────────────────────────────────────
397
398#[cfg(test)]
399#[allow(clippy::unwrap_used)]
400mod tests {
401    use super::*;
402    use clap::CommandFactory;
403
404    #[test]
405    fn cli_help_generates_without_panic() {
406        // Verify the clap schema compiles and produces output
407        let mut cmd = Cli::command();
408        let _ = cmd.render_help();
409    }
410
411    #[test]
412    fn cli_parses_check_subcommand() {
413        let cli = Cli::try_parse_from(["stygian", "check", "pipeline.toml"]).unwrap();
414        assert!(matches!(cli.command, Commands::Check { file } if file == "pipeline.toml"));
415    }
416
417    #[test]
418    fn cli_parses_list_services() {
419        let cli = Cli::try_parse_from(["stygian", "list-services"]).unwrap();
420        assert!(matches!(cli.command, Commands::ListServices));
421    }
422
423    #[test]
424    fn cli_parses_list_providers() {
425        let cli = Cli::try_parse_from(["stygian", "list-providers"]).unwrap();
426        assert!(matches!(cli.command, Commands::ListProviders));
427    }
428
429    #[test]
430    fn cli_parses_graph_viz_dot() {
431        let cli = Cli::try_parse_from(["stygian", "graph-viz", "pipeline.toml", "--format", "dot"])
432            .unwrap();
433        assert!(matches!(
434            cli.command,
435            Commands::GraphViz {
436                format: VizFormat::Dot,
437                ..
438            }
439        ));
440    }
441
442    #[test]
443    fn cli_parses_graph_viz_mermaid() {
444        let cli = Cli::try_parse_from([
445            "stygian",
446            "graph-viz",
447            "pipeline.toml",
448            "--format",
449            "mermaid",
450        ])
451        .unwrap();
452        assert!(matches!(
453            cli.command,
454            Commands::GraphViz {
455                format: VizFormat::Mermaid,
456                ..
457            }
458        ));
459    }
460
461    #[test]
462    fn cli_parses_run_with_watch() {
463        let cli = Cli::try_parse_from(["stygian", "run", "pipeline.toml", "--watch"]).unwrap();
464        assert!(matches!(cli.command, Commands::Run { watch: true, .. }));
465    }
466
467    #[test]
468    fn cmd_list_providers_succeeds() {
469        cmd_list_providers();
470    }
471
472    #[test]
473    fn cmd_list_services_succeeds_empty_registry() {
474        // global registry is empty in tests — should succeed with a "no services" message
475        cmd_list_services();
476    }
477
478    /// Helper: write a minimal valid pipeline TOML to a `NamedTempFile`
479    fn minimal_pipeline_toml() -> tempfile::NamedTempFile {
480        use std::io::Write as _;
481        let mut tmp = tempfile::NamedTempFile::new().unwrap();
482        writeln!(
483            tmp,
484            r#"
485[[services]]
486name = "http"
487kind = "http"
488
489[[nodes]]
490name = "fetch"
491service = "http"
492url = "https://example.com"
493"#
494        )
495        .unwrap();
496        tmp
497    }
498
499    #[test]
500    fn cmd_check_valid_toml_succeeds() {
501        let tmp = minimal_pipeline_toml();
502        cmd_check(tmp.path().to_str().unwrap()).unwrap();
503    }
504
505    #[test]
506    fn cmd_graph_viz_dot_format_succeeds() {
507        let tmp = minimal_pipeline_toml();
508        cmd_graph_viz(tmp.path().to_str().unwrap(), VizFormat::Dot).unwrap();
509    }
510
511    #[test]
512    fn cmd_graph_viz_mermaid_format_succeeds() {
513        let tmp = minimal_pipeline_toml();
514        cmd_graph_viz(tmp.path().to_str().unwrap(), VizFormat::Mermaid).unwrap();
515    }
516}