1use std::collections::BTreeMap;
2
3use serde_json::Value;
4use thiserror::Error;
5
6use crate::types::TransactionView;
7
8const MAX_HAR_BYTES: usize = 10 * 1024 * 1024;
9const MAX_HAR_ENTRIES: usize = 10_000;
10const MAX_HEADERS_PER_ENTRY: usize = 256;
11const MAX_URL_BYTES: usize = 8 * 1024;
12
13#[derive(Debug, Error)]
15pub enum HarError {
16 #[error("invalid HAR json: {0}")]
18 InvalidJson(#[from] serde_json::Error),
19 #[error("invalid HAR structure: {0}")]
21 InvalidStructure(&'static str),
22 #[error("har input exceeds safety limit: {0}")]
24 LimitExceeded(&'static str),
25}
26
27#[derive(Debug, Clone)]
29pub struct ParsedHar {
30 pub page_title: Option<String>,
32 pub requests: Vec<TransactionViewWithType>,
34}
35
36#[derive(Debug, Clone)]
38pub struct TransactionViewWithType {
39 pub transaction: TransactionView,
41 pub resource_type: Option<String>,
43}
44
45impl TransactionViewWithType {
46 #[allow(clippy::missing_const_for_fn)]
48 #[must_use]
49 pub fn url(&self) -> &str {
50 &self.transaction.url
51 }
52
53 #[must_use]
55 pub const fn status(&self) -> u16 {
56 self.transaction.status
57 }
58}
59
60impl From<TransactionViewWithType> for TransactionView {
61 fn from(value: TransactionViewWithType) -> Self {
62 value.transaction
63 }
64}
65
66pub fn parse_har_transactions(har_json: &str) -> Result<ParsedHar, HarError> {
74 if har_json.len() > MAX_HAR_BYTES {
75 return Err(HarError::LimitExceeded("har payload too large"));
76 }
77
78 let root: Value = serde_json::from_str(har_json)?;
79
80 let log = root
81 .get("log")
82 .ok_or(HarError::InvalidStructure("missing log object"))?;
83
84 let page_title = log
85 .get("pages")
86 .and_then(Value::as_array)
87 .and_then(|pages| pages.first())
88 .and_then(|page| page.get("title"))
89 .and_then(Value::as_str)
90 .map(str::to_owned);
91
92 let entries = log
93 .get("entries")
94 .and_then(Value::as_array)
95 .ok_or(HarError::InvalidStructure("missing entries array"))?;
96
97 if entries.len() > MAX_HAR_ENTRIES {
98 return Err(HarError::LimitExceeded("too many HAR entries"));
99 }
100
101 let mut requests: Vec<TransactionViewWithType> = Vec::new();
102
103 for entry in entries {
104 let request = entry
105 .get("request")
106 .ok_or(HarError::InvalidStructure("entry missing request"))?;
107 let response = entry
108 .get("response")
109 .ok_or(HarError::InvalidStructure("entry missing response"))?;
110
111 let url = request
112 .get("url")
113 .and_then(Value::as_str)
114 .map(str::to_owned)
115 .ok_or(HarError::InvalidStructure("entry request missing url"))?;
116
117 if url.len() > MAX_URL_BYTES {
118 return Err(HarError::LimitExceeded("request url too large"));
119 }
120
121 let status = response
122 .get("status")
123 .and_then(Value::as_u64)
124 .and_then(|x| u16::try_from(x).ok())
125 .ok_or(HarError::InvalidStructure("entry response missing status"))?;
126
127 let headers = match response.get("headers").and_then(Value::as_array) {
128 Some(headers) => {
129 if headers.len() > MAX_HEADERS_PER_ENTRY {
130 return Err(HarError::LimitExceeded("too many response headers"));
131 }
132 extract_headers(headers)
133 }
134 None => BTreeMap::new(),
135 };
136
137 let body_snippet = response
138 .get("content")
139 .and_then(|content| content.get("text"))
140 .and_then(Value::as_str)
141 .map(|text| text.chars().take(2_048).collect::<String>());
142
143 let tx = TransactionView {
144 url,
145 status,
146 response_headers: headers,
147 response_body_snippet: body_snippet,
148 };
149
150 requests.push(TransactionViewWithType {
151 transaction: tx,
152 resource_type: entry
153 .get("_resourceType")
154 .and_then(Value::as_str)
155 .map(str::to_owned),
156 });
157 }
158
159 Ok(ParsedHar {
160 page_title,
161 requests,
162 })
163}
164
165fn extract_headers(headers: &[Value]) -> BTreeMap<String, String> {
166 let mut out = BTreeMap::new();
167 for header in headers {
168 let name = header
169 .get("name")
170 .and_then(Value::as_str)
171 .map(str::to_owned);
172 let value = header
173 .get("value")
174 .and_then(Value::as_str)
175 .map(str::to_owned);
176
177 if let (Some(k), Some(v)) = (name, value) {
178 let _prev = out.insert(k, v);
179 }
180 }
181 out
182}
183
184#[cfg(test)]
185#[allow(
186 clippy::unwrap_used,
187 clippy::expect_used,
188 clippy::panic,
189 clippy::indexing_slicing
190)]
191mod tests {
192 use super::*;
193
194 #[test]
195 fn parses_minimal_har() {
196 let json = r#"{
197 "log": {
198 "pages": [{"title": "https://example.com"}],
199 "entries": [
200 {
201 "_resourceType": "document",
202 "request": {"url": "https://example.com"},
203 "response": {
204 "status": 403,
205 "headers": [{"name": "server", "value": "cloudflare"}],
206 "content": {"text": "Attention Required! | Cloudflare"}
207 }
208 }
209 ]
210 }
211 }"#;
212
213 let parsed_result = parse_har_transactions(json);
214 assert!(parsed_result.is_ok(), "parse should succeed");
215
216 let Ok(parsed) = parsed_result else {
217 return;
218 };
219
220 assert_eq!(parsed.page_title.as_deref(), Some("https://example.com"));
221 assert_eq!(parsed.requests.len(), 1);
222
223 let first = parsed.requests.first();
224 assert!(first.is_some(), "parsed requests unexpectedly empty");
225 if let Some(first) = first {
226 assert_eq!(first.status(), 403);
227 assert_eq!(first.url(), "https://example.com");
228 }
229 }
230
231 #[test]
232 fn rejects_oversized_har_payload() {
233 let oversized = " ".repeat(MAX_HAR_BYTES + 1);
234
235 let result = parse_har_transactions(&oversized);
236
237 assert!(matches!(
238 result,
239 Err(HarError::LimitExceeded("har payload too large"))
240 ));
241 }
242
243 #[test]
244 fn rejects_too_many_entries() {
245 let entries = std::iter::repeat_n(
246 r#"{"request":{"url":"https://example.com"},"response":{"status":200}}"#,
247 MAX_HAR_ENTRIES + 1,
248 )
249 .collect::<Vec<_>>()
250 .join(",");
251 let json = format!(r#"{{"log":{{"entries":[{entries}]}}}}"#);
252
253 let result = parse_har_transactions(&json);
254
255 assert!(matches!(
256 result,
257 Err(HarError::LimitExceeded("too many HAR entries"))
258 ));
259 }
260
261 #[test]
262 fn rejects_too_many_response_headers() {
263 let headers = std::iter::repeat_n(
264 r#"{"name":"server","value":"cloudflare"}"#,
265 MAX_HEADERS_PER_ENTRY + 1,
266 )
267 .collect::<Vec<_>>()
268 .join(",");
269 let json = format!(
270 r#"{{"log":{{"entries":[{{"request":{{"url":"https://example.com"}},"response":{{"status":403,"headers":[{headers}]}}}}]}}}}"#
271 );
272
273 let result = parse_har_transactions(&json);
274
275 assert!(matches!(
276 result,
277 Err(HarError::LimitExceeded("too many response headers"))
278 ));
279 }
280}