1use std::collections::BTreeMap;
2
3use serde_json::Value;
4use thiserror::Error;
5
6use crate::types::TransactionView;
7
8const MAX_HAR_BYTES: usize = 10 * 1024 * 1024;
9const MAX_HAR_ENTRIES: usize = 10_000;
10const MAX_HEADERS_PER_ENTRY: usize = 256;
11const MAX_URL_BYTES: usize = 8 * 1024;
12
13#[derive(Debug, Error)]
15pub enum HarError {
16 #[error("invalid HAR json: {0}")]
18 InvalidJson(#[from] serde_json::Error),
19 #[error("invalid HAR structure: {0}")]
21 InvalidStructure(&'static str),
22 #[error("har input exceeds safety limit: {0}")]
24 LimitExceeded(&'static str),
25}
26
27#[derive(Debug, Clone)]
29pub struct ParsedHar {
30 pub page_title: Option<String>,
32 pub requests: Vec<TransactionViewWithType>,
34}
35
36#[derive(Debug, Clone)]
38pub struct TransactionViewWithType {
39 pub transaction: TransactionView,
41 pub resource_type: Option<String>,
43}
44
45impl TransactionViewWithType {
46 #[allow(clippy::missing_const_for_fn)]
48 #[must_use]
49 pub fn url(&self) -> &str {
50 &self.transaction.url
51 }
52
53 #[must_use]
55 pub const fn status(&self) -> u16 {
56 self.transaction.status
57 }
58}
59
60impl From<TransactionViewWithType> for TransactionView {
61 fn from(value: TransactionViewWithType) -> Self {
62 value.transaction
63 }
64}
65
66pub fn parse_har_transactions(har_json: &str) -> Result<ParsedHar, HarError> {
74 if har_json.len() > MAX_HAR_BYTES {
75 return Err(HarError::LimitExceeded("har payload too large"));
76 }
77
78 let root: Value = serde_json::from_str(har_json)?;
79
80 let log = root
81 .get("log")
82 .ok_or(HarError::InvalidStructure("missing log object"))?;
83
84 let page_title = log
85 .get("pages")
86 .and_then(Value::as_array)
87 .and_then(|pages| pages.first())
88 .and_then(|page| page.get("title"))
89 .and_then(Value::as_str)
90 .map(str::to_owned);
91
92 let entries = log
93 .get("entries")
94 .and_then(Value::as_array)
95 .ok_or(HarError::InvalidStructure("missing entries array"))?;
96
97 if entries.len() > MAX_HAR_ENTRIES {
98 return Err(HarError::LimitExceeded("too many HAR entries"));
99 }
100
101 let mut requests: Vec<TransactionViewWithType> = Vec::new();
102
103 for entry in entries {
104 let request = entry
105 .get("request")
106 .ok_or(HarError::InvalidStructure("entry missing request"))?;
107 let response = entry
108 .get("response")
109 .ok_or(HarError::InvalidStructure("entry missing response"))?;
110
111 let url = request
112 .get("url")
113 .and_then(Value::as_str)
114 .map(str::to_owned)
115 .ok_or(HarError::InvalidStructure("entry request missing url"))?;
116
117 if url.len() > MAX_URL_BYTES {
118 return Err(HarError::LimitExceeded("request url too large"));
119 }
120
121 let status = response
122 .get("status")
123 .and_then(Value::as_u64)
124 .and_then(|x| u16::try_from(x).ok())
125 .ok_or(HarError::InvalidStructure("entry response missing status"))?;
126
127 let headers = match response.get("headers").and_then(Value::as_array) {
128 Some(headers) => {
129 if headers.len() > MAX_HEADERS_PER_ENTRY {
130 return Err(HarError::LimitExceeded("too many response headers"));
131 }
132 extract_headers(headers)
133 }
134 None => BTreeMap::new(),
135 };
136
137 let body_snippet = response
138 .get("content")
139 .and_then(|content| content.get("text"))
140 .and_then(Value::as_str)
141 .map(|text| text.chars().take(2_048).collect::<String>());
142
143 let tx = TransactionView {
144 url,
145 status,
146 response_headers: headers,
147 response_body_snippet: body_snippet,
148 };
149
150 requests.push(TransactionViewWithType {
151 transaction: tx,
152 resource_type: entry
153 .get("_resourceType")
154 .and_then(Value::as_str)
155 .map(str::to_owned),
156 });
157 }
158
159 Ok(ParsedHar {
160 page_title,
161 requests,
162 })
163}
164
165fn extract_headers(headers: &[Value]) -> BTreeMap<String, String> {
166 let mut out = BTreeMap::new();
167 for header in headers {
168 let name = header
169 .get("name")
170 .and_then(Value::as_str)
171 .map(str::to_owned);
172 let value = header
173 .get("value")
174 .and_then(Value::as_str)
175 .map(str::to_owned);
176
177 if let (Some(k), Some(v)) = (name, value) {
178 let _prev = out.insert(k, v);
179 }
180 }
181 out
182}
183
184#[cfg(test)]
185mod tests {
186 use super::*;
187
188 #[test]
189 fn parses_minimal_har() {
190 let json = r#"{
191 "log": {
192 "pages": [{"title": "https://example.com"}],
193 "entries": [
194 {
195 "_resourceType": "document",
196 "request": {"url": "https://example.com"},
197 "response": {
198 "status": 403,
199 "headers": [{"name": "server", "value": "cloudflare"}],
200 "content": {"text": "Attention Required! | Cloudflare"}
201 }
202 }
203 ]
204 }
205 }"#;
206
207 let parsed_result = parse_har_transactions(json);
208 assert!(parsed_result.is_ok(), "parse should succeed");
209
210 let Ok(parsed) = parsed_result else {
211 return;
212 };
213
214 assert_eq!(parsed.page_title.as_deref(), Some("https://example.com"));
215 assert_eq!(parsed.requests.len(), 1);
216
217 let first = parsed.requests.first();
218 assert!(first.is_some(), "parsed requests unexpectedly empty");
219 if let Some(first) = first {
220 assert_eq!(first.status(), 403);
221 assert_eq!(first.url(), "https://example.com");
222 }
223 }
224
225 #[test]
226 fn rejects_oversized_har_payload() {
227 let oversized = " ".repeat(MAX_HAR_BYTES + 1);
228
229 let result = parse_har_transactions(&oversized);
230
231 assert!(matches!(
232 result,
233 Err(HarError::LimitExceeded("har payload too large"))
234 ));
235 }
236
237 #[test]
238 fn rejects_too_many_entries() {
239 let entries = std::iter::repeat_n(
240 r#"{"request":{"url":"https://example.com"},"response":{"status":200}}"#,
241 MAX_HAR_ENTRIES + 1,
242 )
243 .collect::<Vec<_>>()
244 .join(",");
245 let json = format!(r#"{{"log":{{"entries":[{entries}]}}}}"#);
246
247 let result = parse_har_transactions(&json);
248
249 assert!(matches!(
250 result,
251 Err(HarError::LimitExceeded("too many HAR entries"))
252 ));
253 }
254
255 #[test]
256 fn rejects_too_many_response_headers() {
257 let headers = std::iter::repeat_n(
258 r#"{"name":"server","value":"cloudflare"}"#,
259 MAX_HEADERS_PER_ENTRY + 1,
260 )
261 .collect::<Vec<_>>()
262 .join(",");
263 let json = format!(
264 r#"{{"log":{{"entries":[{{"request":{{"url":"https://example.com"}},"response":{{"status":403,"headers":[{headers}]}}}}]}}}}"#
265 );
266
267 let result = parse_har_transactions(&json);
268
269 assert!(matches!(
270 result,
271 Err(HarError::LimitExceeded("too many response headers"))
272 ));
273 }
274}