1use std::collections::HashMap;
29use std::sync::Arc;
30
31use parking_lot::RwLock;
32use serde_json::{Map, Value, json};
33use tracing::debug;
34
35const INTROSPECTION_QUERY: &str = r"
38query IntrospectionQuery {
39 __schema {
40 queryType { name }
41 mutationType { name }
42 subscriptionType { name }
43 types { ...FullType }
44 directives {
45 name description locations
46 args { ...InputValue }
47 }
48 }
49}
50fragment FullType on __Type {
51 kind name description
52 fields(includeDeprecated: true) {
53 name description
54 args { ...InputValue }
55 type { ...TypeRef }
56 isDeprecated deprecationReason
57 }
58 inputFields { ...InputValue }
59 interfaces { ...TypeRef }
60 enumValues(includeDeprecated: true) {
61 name description isDeprecated deprecationReason
62 }
63 possibleTypes { ...TypeRef }
64}
65fragment InputValue on __InputValue {
66 name description
67 type { ...TypeRef }
68 defaultValue
69}
70fragment TypeRef on __Type {
71 kind name
72 ofType {
73 kind name
74 ofType {
75 kind name
76 ofType {
77 kind name
78 ofType {
79 kind name
80 ofType {
81 kind name
82 ofType { kind name }
83 }
84 }
85 }
86 }
87 }
88}
89";
90
91use crate::domain::error::{ProviderError, Result, ServiceError, StygianError};
92use crate::ports::{AIProvider, GraphQlAuth, ScrapingService, ServiceInput};
93
94#[derive(Debug, Clone)]
96pub struct DiscoveredSchema {
97 pub schema: Value,
99 pub source: SchemaSource,
101 pub confidence: f32,
103 pub url_pattern: Option<String>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
109pub enum SchemaSource {
110 ExampleInference,
112 AiInference,
114 Cached,
116 UserCorrection,
118}
119
120#[derive(Debug, Clone)]
122pub struct SchemaDiscoveryConfig {
123 pub max_content_chars: usize,
125 pub cache_schemas: bool,
127 pub min_ai_confidence: f32,
129}
130
131impl Default for SchemaDiscoveryConfig {
132 fn default() -> Self {
133 Self {
134 max_content_chars: 16_000,
135 cache_schemas: true,
136 min_ai_confidence: 0.6,
137 }
138 }
139}
140
141#[derive(Debug, Clone)]
143pub struct SchemaDiff {
144 pub added_fields: Vec<String>,
146 pub removed_fields: Vec<String>,
148 pub type_changes: Vec<(String, String, String)>, pub is_compatible: bool,
152}
153
154pub struct SchemaDiscoveryService {
160 config: SchemaDiscoveryConfig,
161 ai_provider: Option<Arc<dyn AIProvider>>,
163 cache: Arc<RwLock<HashMap<String, DiscoveredSchema>>>,
165 corrections: Arc<RwLock<HashMap<String, Value>>>,
167 graphql_service: Option<Arc<dyn ScrapingService>>,
169}
170
171impl SchemaDiscoveryService {
172 pub fn new(config: SchemaDiscoveryConfig, ai_provider: Option<Arc<dyn AIProvider>>) -> Self {
182 Self {
183 config,
184 ai_provider,
185 cache: Arc::new(RwLock::new(HashMap::new())),
186 corrections: Arc::new(RwLock::new(HashMap::new())),
187 graphql_service: None,
188 }
189 }
190
191 #[must_use]
205 pub fn with_graphql_service(mut self, service: Arc<dyn ScrapingService>) -> Self {
206 self.graphql_service = Some(service);
207 self
208 }
209
210 pub fn infer_from_example(&self, example: &Value) -> Value {
229 Self::infer_schema_for_value(example)
230 }
231
232 fn infer_schema_for_value(value: &Value) -> Value {
234 match value {
235 Value::Object(map) => {
236 let mut properties = Map::new();
237 let mut required = Vec::new();
238 for (key, val) in map {
239 properties.insert(key.clone(), Self::infer_schema_for_value(val));
240 required.push(json!(key));
241 }
242 json!({
243 "type": "object",
244 "properties": Value::Object(properties),
245 "required": required
246 })
247 }
248 Value::Array(items) => {
249 let item_schema = items
251 .first()
252 .map_or_else(|| json!({}), Self::infer_schema_for_value);
253 json!({
254 "type": "array",
255 "items": item_schema
256 })
257 }
258 Value::String(_) => json!({"type": "string"}),
259 Value::Number(n) => {
260 if n.is_i64() || n.is_u64() {
261 json!({"type": "integer"})
262 } else {
263 json!({"type": "number"})
264 }
265 }
266 Value::Bool(_) => json!({"type": "boolean"}),
267 Value::Null => json!({"type": "null"}),
268 }
269 }
270
271 pub async fn infer_from_html(
288 &self,
289 html: &str,
290 url_pattern: Option<&str>,
291 ) -> Result<DiscoveredSchema> {
292 if self.config.cache_schemas
294 && let Some(pattern) = url_pattern
295 {
296 let cache = self.cache.read();
297 if let Some(cached) = cache.get(pattern) {
298 debug!(pattern, "Schema cache hit");
299 return Ok(DiscoveredSchema {
300 source: SchemaSource::Cached,
301 ..cached.clone()
302 });
303 }
304 }
305
306 let provider = self.ai_provider.as_ref().ok_or_else(|| {
307 StygianError::Provider(ProviderError::ApiError(
308 "No AI provider configured for HTML schema discovery".to_string(),
309 ))
310 })?;
311
312 let truncated = if html.len() > self.config.max_content_chars {
313 &html[..self.config.max_content_chars]
314 } else {
315 html
316 };
317
318 let discovery_schema = json!({
319 "type": "object",
320 "properties": {
321 "schema": {
322 "type": "object",
323 "description": "A valid JSON Schema object describing the structured data in the HTML"
324 },
325 "confidence": {
326 "type": "number",
327 "minimum": 0.0,
328 "maximum": 1.0,
329 "description": "Confidence in the inferred schema [0.0, 1.0]"
330 },
331 "description": {
332 "type": "string",
333 "description": "Human-readable explanation of what data was detected"
334 }
335 },
336 "required": ["schema", "confidence"]
337 });
338
339 let prompt = format!(
340 "Analyze the following HTML page and infer a JSON Schema for the main structured \
341 data it contains (e.g. product listings, articles, search results). \
342 Return the schema definition.\n\nHTML:\n{truncated}"
343 );
344
345 let result = provider.extract(prompt, discovery_schema).await?;
346
347 let schema = result
348 .get("schema")
349 .cloned()
350 .unwrap_or_else(|| json!({"type": "object"}));
351
352 #[allow(clippy::cast_possible_truncation)]
353 let confidence = result
354 .get("confidence")
355 .and_then(Value::as_f64)
356 .unwrap_or(0.7) as f32;
357
358 let schema = self.apply_corrections(schema);
360
361 let discovered = DiscoveredSchema {
362 schema,
363 source: SchemaSource::AiInference,
364 confidence,
365 url_pattern: url_pattern.map(str::to_string),
366 };
367
368 if self.config.cache_schemas
370 && let Some(pattern) = url_pattern
371 {
372 self.cache
373 .write()
374 .insert(pattern.to_string(), discovered.clone());
375 }
376
377 Ok(discovered)
378 }
379
380 fn apply_corrections(&self, mut schema: Value) -> Value {
382 let corrections = self.corrections.read();
383 if corrections.is_empty() {
384 return schema;
385 }
386
387 if let Some(props) = schema.get_mut("properties").and_then(Value::as_object_mut) {
388 for (field, correction) in corrections.iter() {
389 if props.contains_key(field) {
390 props.insert(field.clone(), correction.clone());
391 }
392 }
393 }
394 schema
395 }
396
397 pub fn add_correction(&self, field_name: String, schema_fragment: Value) {
410 self.corrections.write().insert(field_name, schema_fragment);
411 }
412
413 pub fn cache_schema(
430 &self,
431 url_pattern: String,
432 schema: Value,
433 source: SchemaSource,
434 confidence: f32,
435 ) {
436 let discovered = DiscoveredSchema {
437 schema,
438 source,
439 confidence,
440 url_pattern: Some(url_pattern.clone()),
441 };
442 self.cache.write().insert(url_pattern, discovered);
443 }
444
445 pub fn detect_evolution(&self, old_schema: &Value, new_schema: &Value) -> SchemaDiff {
471 let old_props = old_schema
472 .get("properties")
473 .and_then(Value::as_object)
474 .cloned()
475 .unwrap_or_default();
476 let new_props = new_schema
477 .get("properties")
478 .and_then(Value::as_object)
479 .cloned()
480 .unwrap_or_default();
481
482 let added_fields: Vec<String> = new_props
483 .keys()
484 .filter(|k| !old_props.contains_key(*k))
485 .cloned()
486 .collect();
487
488 let removed_fields: Vec<String> = old_props
489 .keys()
490 .filter(|k| !new_props.contains_key(*k))
491 .cloned()
492 .collect();
493
494 let type_changes: Vec<(String, String, String)> = old_props
495 .iter()
496 .filter_map(|(k, old_v)| {
497 let new_v = new_props.get(k)?;
498 let old_t = old_v.get("type").and_then(Value::as_str).unwrap_or("any");
499 let new_t = new_v.get("type").and_then(Value::as_str).unwrap_or("any");
500 (old_t != new_t).then(|| (k.clone(), old_t.to_string(), new_t.to_string()))
501 })
502 .collect();
503
504 let is_compatible = removed_fields.is_empty() && type_changes.is_empty();
505
506 SchemaDiff {
507 added_fields,
508 removed_fields,
509 type_changes,
510 is_compatible,
511 }
512 }
513
514 #[allow(clippy::indexing_slicing)]
541 pub async fn infer_from_graphql(
542 &self,
543 endpoint: &str,
544 auth: Option<GraphQlAuth>,
545 ) -> Result<DiscoveredSchema> {
546 if self.config.cache_schemas {
548 let cache = self.cache.read();
549 if let Some(cached) = cache.get(endpoint) {
550 debug!(endpoint, "GraphQL introspection cache hit");
551 return Ok(DiscoveredSchema {
552 source: SchemaSource::Cached,
553 ..cached.clone()
554 });
555 }
556 }
557
558 let service = self.graphql_service.as_ref().ok_or_else(|| {
560 StygianError::Service(ServiceError::Unavailable(
561 "No GraphQL service configured for introspection".to_string(),
562 ))
563 })?;
564
565 let mut params = json!({
567 "query": INTROSPECTION_QUERY,
568 });
569 if let Some(a) = auth {
570 params["auth"] = json!({
571 "kind": match a.kind {
572 crate::ports::GraphQlAuthKind::Bearer => "bearer",
573 crate::ports::GraphQlAuthKind::ApiKey => "api_key",
574 crate::ports::GraphQlAuthKind::Header => "header",
575 crate::ports::GraphQlAuthKind::None => "none",
576 },
577 "token": a.token,
578 });
579 if let Some(header) = a.header_name {
580 params["auth"]["header_name"] = json!(header);
581 }
582 }
583
584 let input = ServiceInput {
585 url: endpoint.to_string(),
586 params,
587 };
588
589 let output = service.execute(input).await?;
591
592 let response_json: Value = serde_json::from_str(&output.data)
594 .map_err(|e| StygianError::Service(ServiceError::InvalidResponse(e.to_string())))?;
595 let introspection = response_json.get("__schema").ok_or_else(|| {
596 StygianError::Service(ServiceError::InvalidResponse(
597 "introspection response missing __schema key".to_string(),
598 ))
599 })?;
600
601 let query_type_name = introspection
603 .get("queryType")
604 .and_then(|qt| qt.get("name"))
605 .and_then(Value::as_str)
606 .unwrap_or("Query")
607 .to_string();
608
609 let types = introspection
610 .get("types")
611 .and_then(Value::as_array)
612 .cloned()
613 .unwrap_or_default();
614
615 let mut definitions = Map::new();
616 for gql_type in &types {
617 let name = gql_type.get("name").and_then(Value::as_str).unwrap_or("");
618 if name.is_empty() || name.starts_with('_') {
620 continue;
621 }
622 let def = Self::convert_type_to_definition(gql_type);
623 definitions.insert(name.to_string(), def);
624 }
625
626 let schema = json!({
627 "type": "object",
628 "description": format!("GraphQL schema for {endpoint}"),
629 "definitions": Value::Object(definitions),
630 "queryType": query_type_name,
631 });
632
633 let discovered = DiscoveredSchema {
634 schema,
635 source: SchemaSource::AiInference,
636 confidence: 1.0,
637 url_pattern: Some(endpoint.to_string()),
638 };
639
640 if self.config.cache_schemas {
642 self.cache
643 .write()
644 .insert(endpoint.to_string(), discovered.clone());
645 }
646
647 Ok(discovered)
648 }
649
650 #[allow(clippy::indexing_slicing)]
652 fn convert_type_to_definition(gql_type: &Value) -> Value {
653 let kind = gql_type.get("kind").and_then(Value::as_str).unwrap_or("");
654 match kind {
655 "OBJECT" | "INTERFACE" | "INPUT_OBJECT" => {
656 let fields: Vec<Value> = gql_type
657 .get("fields")
658 .or_else(|| gql_type.get("inputFields"))
659 .and_then(Value::as_array)
660 .cloned()
661 .unwrap_or_default();
662
663 let mut properties = Map::new();
664 let mut required: Vec<Value> = Vec::new();
665
666 for field in &fields {
667 let field_name = field.get("name").and_then(Value::as_str).unwrap_or("");
668 if field_name.is_empty() {
669 continue;
670 }
671 let type_ref = &field["type"];
672 if type_ref.get("kind").and_then(Value::as_str) == Some("NON_NULL") {
674 required.push(json!(field_name));
675 }
676 properties.insert(field_name.to_string(), Self::convert_type_ref(type_ref));
677 }
678
679 let mut obj = json!({
680 "type": "object",
681 "properties": Value::Object(properties),
682 });
683 if !required.is_empty() {
684 obj["required"] = json!(required);
685 }
686 obj
687 }
688 "ENUM" => {
689 let values: Vec<Value> = gql_type
690 .get("enumValues")
691 .and_then(Value::as_array)
692 .map(|vals| {
693 vals.iter()
694 .filter_map(|v| v.get("name").and_then(Value::as_str))
695 .map(|s| json!(s))
696 .collect()
697 })
698 .unwrap_or_default();
699 json!({ "type": "string", "enum": values })
700 }
701 "SCALAR" => {
702 let name = gql_type.get("name").and_then(Value::as_str).unwrap_or("");
703 Self::scalar_to_json_schema(name)
704 }
705 _ => json!({}),
706 }
707 }
708
709 fn convert_type_ref(type_ref: &Value) -> Value {
711 let kind = type_ref.get("kind").and_then(Value::as_str).unwrap_or("");
712 match kind {
713 "NON_NULL" => Self::convert_type_ref(&type_ref["ofType"]),
714 "LIST" => json!({
715 "type": "array",
716 "items": Self::convert_type_ref(&type_ref["ofType"]),
717 }),
718 "OBJECT" | "INTERFACE" | "INPUT_OBJECT" | "ENUM" => {
719 let name = type_ref.get("name").and_then(Value::as_str).unwrap_or("");
720 json!({ "$ref": format!("#/definitions/{name}") })
721 }
722 "SCALAR" => {
723 let name = type_ref.get("name").and_then(Value::as_str).unwrap_or("");
724 Self::scalar_to_json_schema(name)
725 }
726 _ => json!({}),
727 }
728 }
729
730 fn scalar_to_json_schema(name: &str) -> Value {
732 match name {
733 "String" | "ID" => json!({ "type": "string" }),
734 "Int" => json!({ "type": "integer" }),
735 "Float" => json!({ "type": "number" }),
736 "Boolean" => json!({ "type": "boolean" }),
737 s if s.to_ascii_lowercase().contains("datetime")
738 || s.to_ascii_lowercase().contains("date") =>
739 {
740 json!({ "type": "string", "format": "date-time" })
741 }
742 _ => json!({}),
743 }
744 }
745
746 pub fn is_valid_schema(schema: &Value) -> bool {
760 if !schema.is_object() {
761 return false;
762 }
763 schema.get("type").is_some() || schema.get("properties").is_some()
764 }
765}
766
767#[cfg(test)]
768#[allow(
769 clippy::unwrap_used,
770 clippy::indexing_slicing,
771 clippy::approx_constant,
772 clippy::significant_drop_tightening,
773 clippy::float_cmp,
774 clippy::unnecessary_map_or
775)]
776mod tests {
777 use super::*;
778 use serde_json::json;
779
780 fn svc() -> SchemaDiscoveryService {
781 SchemaDiscoveryService::new(SchemaDiscoveryConfig::default(), None)
782 }
783
784 #[test]
787 fn test_infer_object_schema() {
788 let s = svc();
789 let schema = s.infer_from_example(&json!({"name": "Alice", "age": 30, "active": true}));
790 assert_eq!(schema["type"].as_str().unwrap(), "object");
791 assert_eq!(
792 schema["properties"]["name"]["type"].as_str().unwrap(),
793 "string"
794 );
795 assert_eq!(
796 schema["properties"]["age"]["type"].as_str().unwrap(),
797 "integer"
798 );
799 assert_eq!(
800 schema["properties"]["active"]["type"].as_str().unwrap(),
801 "boolean"
802 );
803 }
804
805 #[test]
806 fn test_infer_array_schema() {
807 let s = svc();
808 let schema = s.infer_from_example(&json!([{"id": 1}, {"id": 2}]));
809 assert_eq!(schema["type"].as_str().unwrap(), "array");
810 assert_eq!(schema["items"]["type"].as_str().unwrap(), "object");
811 }
812
813 #[test]
814 fn test_infer_string_schema() {
815 let s = svc();
816 let schema = s.infer_from_example(&json!("hello"));
817 assert_eq!(schema["type"].as_str().unwrap(), "string");
818 }
819
820 #[test]
821 fn test_infer_number_float() {
822 let s = svc();
823 let schema = s.infer_from_example(&json!(3.14));
824 assert_eq!(schema["type"].as_str().unwrap(), "number");
825 }
826
827 #[test]
828 fn test_infer_null() {
829 let s = svc();
830 let schema = s.infer_from_example(&Value::Null);
831 assert_eq!(schema["type"].as_str().unwrap(), "null");
832 }
833
834 #[test]
837 fn test_valid_schema_with_type() {
838 assert!(SchemaDiscoveryService::is_valid_schema(
839 &json!({"type": "object"})
840 ));
841 }
842
843 #[test]
844 fn test_valid_schema_with_properties() {
845 assert!(SchemaDiscoveryService::is_valid_schema(
846 &json!({"properties": {"x": {"type": "string"}}})
847 ));
848 }
849
850 #[test]
851 fn test_invalid_schema_string() {
852 assert!(!SchemaDiscoveryService::is_valid_schema(&json!(
853 "not schema"
854 )));
855 }
856
857 #[test]
858 fn test_invalid_schema_empty_object() {
859 assert!(!SchemaDiscoveryService::is_valid_schema(&json!({})));
861 }
862
863 #[test]
866 fn test_evolution_added_field() {
867 let s = svc();
868 let old = json!({"type": "object", "properties": {"name": {"type": "string"}}});
869 let new = json!({"type": "object", "properties": {"name": {"type": "string"}, "email": {"type": "string"}}});
870 let diff = s.detect_evolution(&old, &new);
871 assert!(diff.added_fields.contains(&"email".to_string()));
872 assert!(diff.removed_fields.is_empty());
873 assert!(diff.is_compatible);
874 }
875
876 #[test]
877 fn test_evolution_removed_field() {
878 let s = svc();
879 let old = json!({"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}});
880 let new = json!({"type": "object", "properties": {"name": {"type": "string"}}});
881 let diff = s.detect_evolution(&old, &new);
882 assert!(diff.removed_fields.contains(&"age".to_string()));
883 assert!(!diff.is_compatible);
884 }
885
886 #[test]
887 fn test_evolution_type_change() {
888 let s = svc();
889 let old = json!({"type": "object", "properties": {"id": {"type": "string"}}});
890 let new = json!({"type": "object", "properties": {"id": {"type": "integer"}}});
891 let diff = s.detect_evolution(&old, &new);
892 assert_eq!(diff.type_changes.len(), 1);
893 assert_eq!(
894 diff.type_changes[0],
895 (
896 "id".to_string(),
897 "string".to_string(),
898 "integer".to_string()
899 )
900 );
901 assert!(!diff.is_compatible);
902 }
903
904 #[test]
905 fn test_evolution_no_change() {
906 let s = svc();
907 let schema = json!({"type": "object", "properties": {"x": {"type": "string"}}});
908 let diff = s.detect_evolution(&schema, &schema);
909 assert!(diff.added_fields.is_empty());
910 assert!(diff.removed_fields.is_empty());
911 assert!(diff.is_compatible);
912 }
913
914 #[test]
917 fn test_correction_overrides_inferred_type() {
918 let s = svc();
919 s.add_correction("id".to_string(), json!({"type": "integer"}));
920
921 let schema = json!({
922 "type": "object",
923 "properties": {
924 "id": {"type": "string"},
925 "name": {"type": "string"}
926 }
927 });
928 let corrected = s.apply_corrections(schema);
929 assert_eq!(
930 corrected["properties"]["id"]["type"].as_str().unwrap(),
931 "integer"
932 );
933 assert_eq!(
934 corrected["properties"]["name"]["type"].as_str().unwrap(),
935 "string"
936 );
937 }
938
939 #[test]
942 fn test_cache_and_retrieve() {
943 let s = svc();
944 let schema = json!({"type": "object", "properties": {"title": {"type": "string"}}});
945 s.cache_schema(
946 "example.com".to_string(),
947 schema.clone(),
948 SchemaSource::UserCorrection,
949 1.0,
950 );
951
952 let cache = s.cache.read();
953 let entry = cache.get("example.com").unwrap();
954 assert_eq!(entry.schema, schema);
955 assert_eq!(entry.source, SchemaSource::UserCorrection);
956 }
957
958 #[tokio::test]
961 async fn test_infer_from_html_no_provider() {
962 let s = svc();
963 let result = s
964 .infer_from_html("<html><body>test</body></html>", None)
965 .await;
966 assert!(result.is_err());
967 assert!(result.unwrap_err().to_string().contains("No AI provider"));
968 }
969
970 #[tokio::test]
973 async fn test_infer_from_html_cache_hit() {
974 let s = svc();
975 let schema = json!({"type": "object"});
976 s.cache_schema(
977 "example.com".to_string(),
978 schema.clone(),
979 SchemaSource::Cached,
980 0.9,
981 );
982
983 let result = s
984 .infer_from_html("<html/>", Some("example.com"))
985 .await
986 .unwrap();
987 assert_eq!(result.source, SchemaSource::Cached);
988 assert_eq!(result.schema, schema);
989 }
990
991 struct MockGraphQlService(Value);
995
996 #[async_trait::async_trait]
997 impl ScrapingService for MockGraphQlService {
998 fn name(&self) -> &'static str {
999 "mock_graphql"
1000 }
1001 async fn execute(
1002 &self,
1003 _input: crate::ports::ServiceInput,
1004 ) -> Result<crate::ports::ServiceOutput> {
1005 Ok(crate::ports::ServiceOutput {
1006 data: serde_json::to_string(&self.0).unwrap_or_default(),
1007 metadata: Value::default(),
1008 })
1009 }
1010 }
1011
1012 impl SchemaDiscoveryService {
1013 fn with_mock_graphql(config: SchemaDiscoveryConfig, response: Value) -> Self {
1018 let svc = Arc::new(MockGraphQlService(response));
1019 Self::new(config, None).with_graphql_service(svc)
1020 }
1021 }
1022
1023 fn make_introspection_response_simple() -> Value {
1024 json!({
1025 "__schema": {
1026 "queryType": { "name": "Query" },
1027 "mutationType": null,
1028 "subscriptionType": null,
1029 "types": [
1030 {
1031 "kind": "OBJECT",
1032 "name": "Query",
1033 "fields": [
1034 {
1035 "name": "user",
1036 "type": { "kind": "OBJECT", "name": "User", "ofType": null }
1037 }
1038 ],
1039 "inputFields": null,
1040 "interfaces": [],
1041 "enumValues": null,
1042 "possibleTypes": null
1043 },
1044 {
1045 "kind": "OBJECT",
1046 "name": "User",
1047 "fields": [
1048 {
1049 "name": "id",
1050 "type": { "kind": "NON_NULL", "name": null, "ofType": { "kind": "SCALAR", "name": "ID", "ofType": null } }
1051 },
1052 {
1053 "name": "name",
1054 "type": { "kind": "SCALAR", "name": "String", "ofType": null }
1055 }
1056 ],
1057 "inputFields": null,
1058 "interfaces": [],
1059 "enumValues": null,
1060 "possibleTypes": null
1061 },
1062 {
1063 "kind": "SCALAR",
1064 "name": "String",
1065 "fields": null,
1066 "inputFields": null,
1067 "interfaces": null,
1068 "enumValues": null,
1069 "possibleTypes": null
1070 },
1071 {
1072 "kind": "SCALAR",
1073 "name": "ID",
1074 "fields": null,
1075 "inputFields": null,
1076 "interfaces": null,
1077 "enumValues": null,
1078 "possibleTypes": null
1079 }
1080 ],
1081 "directives": []
1082 }
1083 })
1084 }
1085
1086 #[tokio::test]
1087 async fn introspection_no_graphql_service() {
1088 let s = svc(); let result = s
1090 .infer_from_graphql("https://api.example.com/graphql", None)
1091 .await;
1092 assert!(result.is_err());
1093 assert!(
1094 result
1095 .unwrap_err()
1096 .to_string()
1097 .contains("No GraphQL service")
1098 );
1099 }
1100
1101 #[test]
1102 fn graphql_type_to_json_schema_scalar() {
1103 assert_eq!(
1104 SchemaDiscoveryService::scalar_to_json_schema("String"),
1105 json!({"type": "string"})
1106 );
1107 assert_eq!(
1108 SchemaDiscoveryService::scalar_to_json_schema("Int"),
1109 json!({"type": "integer"})
1110 );
1111 assert_eq!(
1112 SchemaDiscoveryService::scalar_to_json_schema("Float"),
1113 json!({"type": "number"})
1114 );
1115 assert_eq!(
1116 SchemaDiscoveryService::scalar_to_json_schema("Boolean"),
1117 json!({"type": "boolean"})
1118 );
1119 assert_eq!(
1120 SchemaDiscoveryService::scalar_to_json_schema("ID"),
1121 json!({"type": "string"})
1122 );
1123 assert_eq!(
1124 SchemaDiscoveryService::scalar_to_json_schema("DateTime"),
1125 json!({"type": "string", "format": "date-time"})
1126 );
1127 assert_eq!(
1129 SchemaDiscoveryService::scalar_to_json_schema("JSON"),
1130 json!({})
1131 );
1132 }
1133
1134 #[test]
1135 fn graphql_type_to_json_schema_object() {
1136 let gql_type = json!({
1137 "kind": "OBJECT",
1138 "name": "User",
1139 "fields": [
1140 {
1141 "name": "id",
1142 "type": { "kind": "SCALAR", "name": "ID", "ofType": null }
1143 },
1144 {
1145 "name": "email",
1146 "type": { "kind": "SCALAR", "name": "String", "ofType": null }
1147 }
1148 ]
1149 });
1150 let schema = SchemaDiscoveryService::convert_type_to_definition(&gql_type);
1151 assert_eq!(schema["type"].as_str().unwrap(), "object");
1152 assert_eq!(
1153 schema["properties"]["id"]["type"].as_str().unwrap(),
1154 "string"
1155 );
1156 assert_eq!(
1157 schema["properties"]["email"]["type"].as_str().unwrap(),
1158 "string"
1159 );
1160 assert!(
1162 schema.get("required").is_none()
1163 || schema["required"].as_array().is_none_or(Vec::is_empty)
1164 );
1165 }
1166
1167 #[test]
1168 fn graphql_non_null_adds_to_required() {
1169 let gql_type = json!({
1170 "kind": "OBJECT",
1171 "name": "Product",
1172 "fields": [
1173 {
1174 "name": "sku",
1175 "type": {
1176 "kind": "NON_NULL",
1177 "name": null,
1178 "ofType": { "kind": "SCALAR", "name": "String", "ofType": null }
1179 }
1180 },
1181 {
1182 "name": "description",
1183 "type": { "kind": "SCALAR", "name": "String", "ofType": null }
1184 }
1185 ]
1186 });
1187 let schema = SchemaDiscoveryService::convert_type_to_definition(&gql_type);
1188 let required = schema["required"].as_array().unwrap();
1189 assert!(required.contains(&json!("sku")));
1190 assert!(!required.contains(&json!("description")));
1191 }
1192
1193 #[tokio::test]
1194 async fn introspection_result_cached() {
1195 let response = make_introspection_response_simple();
1196 let s =
1197 SchemaDiscoveryService::with_mock_graphql(SchemaDiscoveryConfig::default(), response);
1198
1199 let first = s
1201 .infer_from_graphql("https://api.example.com/graphql", None)
1202 .await
1203 .unwrap();
1204 assert_eq!(first.source, SchemaSource::AiInference);
1205 assert_eq!(first.confidence, 1.0);
1206
1207 let second = s
1209 .infer_from_graphql("https://api.example.com/graphql", None)
1210 .await
1211 .unwrap();
1212 assert_eq!(second.source, SchemaSource::Cached);
1213 assert_eq!(second.schema["queryType"].as_str().unwrap(), "Query");
1214 }
1215}