ck3_history_extractor/parser/
section.rs

1use derive_more::{Display, From};
2use jomini::{
3    binary::{ReaderError as BinaryReaderError, Token as BinaryToken, TokenResolver},
4    text::{ReaderError as TextReaderError, Token as TextToken},
5    Scalar, ScalarError,
6};
7
8use super::{
9    super::types::HashMap,
10    game_object::ConversionError,
11    tokens::TOKEN_TRANSLATOR,
12    types::{Tape, TapeError},
13    SaveFileObject, SaveFileValue,
14};
15
16use std::{
17    error,
18    fmt::{self, Debug},
19    num::ParseIntError,
20    string::FromUtf8Error,
21};
22
23/// An error that occured while processing a specific section
24#[derive(Debug, From, Display)]
25pub enum SectionError {
26    /// An error occured while converting a value
27    ConversionError(ConversionError),
28    /// An error occured while parsing a scalar
29    ScalarError(ScalarError),
30    /// An unknown token was encountered
31    UnknownToken(u16),
32    /// An error occured while reading from the tape
33    TapeError(TapeError),
34    /// An error occured while decoding bytes
35    DecodingError(FromUtf8Error),
36}
37
38impl From<TextReaderError> for SectionError {
39    fn from(value: TextReaderError) -> Self {
40        Self::TapeError(value.into())
41    }
42}
43
44impl From<BinaryReaderError> for SectionError {
45    fn from(value: BinaryReaderError) -> Self {
46        Self::TapeError(value.into())
47    }
48}
49
50impl From<ParseIntError> for SectionError {
51    fn from(value: ParseIntError) -> Self {
52        Self::ConversionError(value.into())
53    }
54}
55
56impl error::Error for SectionError {
57    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
58        match self {
59            Self::ConversionError(err) => Some(err),
60            Self::ScalarError(err) => Some(err),
61            Self::TapeError(err) => Some(err),
62            Self::DecodingError(err) => Some(err),
63            _ => None,
64        }
65    }
66}
67
68/// The headers preceding color values. To be ignored
69const COLOR_HEADERS: [&[u8]; 2] = [b"rgb", b"hsv"];
70
71/// A stack entry for the section parser.
72/// It serves two very important functions. First: it stores the name it should
73/// be saved under, or 'None' if it should be saved in parent as if the parent
74/// was an array. Second: it stores the values that are being parsed,
75/// as if the object was simultaneously an array and a map. This is then
76/// lazily evaluated into a homogeneous object. The object internals are lazily
77/// evaluated so performance cost for homogenous objects should be minimal
78#[derive(Debug, Clone)]
79struct StackEntry {
80    name: Option<String>,
81    array: Option<Vec<SaveFileValue>>,
82    map: Option<HashMap<String, SaveFileValue>>,
83}
84
85impl StackEntry {
86    /// Create a new stack entry with an optional name.
87    fn new(name: Option<String>) -> Self {
88        StackEntry {
89            name,
90            array: None,
91            map: None,
92        }
93    }
94
95    /// Push a value into the stack entry.
96    fn push(&mut self, value: SaveFileValue) {
97        if self.array.is_none() {
98            self.array = Some(Vec::new());
99        }
100        self.array.as_mut().unwrap().push(value);
101    }
102
103    /// Insert a key-value pair into the stack entry.
104    fn insert(&mut self, key: String, value: SaveFileValue) {
105        if self.map.is_none() {
106            self.map = Some(HashMap::new());
107        }
108        let map = self.map.as_mut().unwrap();
109        if let Some(val) = map.get_mut(&key) {
110            if let SaveFileValue::Object(ob) = val {
111                if let SaveFileObject::Array(arr) = ob {
112                    arr.push(value);
113                    return;
114                }
115            }
116            let arr = vec![val.clone(), value];
117            map.insert(key, SaveFileValue::Object(SaveFileObject::Array(arr)));
118        } else {
119            map.insert(key, value);
120        }
121    }
122}
123
124impl Into<SaveFileObject> for StackEntry {
125    fn into(self) -> SaveFileObject {
126        if self.map.is_none() {
127            return SaveFileObject::Array(self.array.unwrap_or(Vec::new()));
128        } else if self.array.is_none() {
129            return SaveFileObject::Map(self.map.unwrap());
130        } else {
131            let mut map = self.map.unwrap();
132            let mut array = self.array.unwrap();
133            // now we have to somehow combine universally a hashmap and an array
134            if map.keys().all(|k| k.chars().all(|k| k.is_digit(10))) {
135                // the map keys are all numerical, means probably we want to treat them as indices into the array
136                let mut keys = map
137                    .keys()
138                    .map(|k| (k.parse::<usize>().unwrap(), k.clone()))
139                    .collect::<Vec<_>>();
140                keys.sort();
141                for (index, key) in keys {
142                    let value = map.remove(&key).unwrap();
143                    if index > array.len() {
144                        array.push(value);
145                    } else {
146                        array.insert(index, value);
147                    }
148                }
149                return SaveFileObject::Array(array);
150            } else {
151                unimplemented!(
152                    "combining a hashmap and an array is not yet implemented, {:?}, {:?}",
153                    map,
154                    array
155                );
156            }
157        }
158    }
159}
160
161/// Process a scalar into a string.
162/// The [ToString] implementation of [Scalar] will be used if the scalar is ASCII.
163/// This implementation is weird overall because it will not handle non-ASCII characters correctly.
164fn scalar_to_string(scalar: Scalar) -> Result<String, SectionError> {
165    if scalar.is_ascii() {
166        Ok(scalar.to_string())
167    } else {
168        // TODO optimalization in which you can avoid parsing the string if the string is non ascii
169        Ok(String::from_utf8(scalar.as_bytes().to_vec())?)
170    }
171}
172
173/// A section of the save file.
174/// It directly maps to a [SaveFileObject] and is the largest unit of data in the save file.
175/// Since [Tape] holds state, it must be mutable for the section to be parsable.
176pub struct Section<'tape, 'data> {
177    tape: &'tape mut Tape<'data>,
178    name: String,
179}
180
181impl<'tape, 'data> Section<'tape, 'data> {
182    /// Create a new section from a tape.
183    /// The section will be named `name` and will start at `offset` and end at `end`.
184    /// The first token of the section (pointed at by `offset`) is expected to an object or array token.
185    /// The end token is not included in the section.
186    pub fn new(tape: &'tape mut Tape<'data>, name: String) -> Self {
187        Section { tape, name }
188    }
189
190    /// Get the name of the section.
191    pub fn get_name(&self) -> &str {
192        &self.name
193    }
194
195    /// Skip the section. This must be called if the section is not going to be parsed.
196    pub fn skip(&mut self) -> Result<(), SectionError> {
197        Ok(self.tape.skip_container()?)
198    }
199
200    /// Parse the section into a [SaveFileObject]. This will consume the section.
201    pub fn parse(&mut self) -> Result<SaveFileObject, SectionError> {
202        let mut stack: Vec<StackEntry> = vec![StackEntry::new(Some(self.name.clone()))];
203        let mut key = None;
204        let mut past_eq = false;
205        /// Blanket implementation that handles a new token, assuming that the token cannot be a key
206        fn add_value_quoted<T: Into<SaveFileValue>>(
207            stack: &mut Vec<StackEntry>,
208            key: &mut Option<String>,
209            past_eq: &mut bool,
210            token: T,
211        ) {
212            if *past_eq {
213                stack
214                    .last_mut()
215                    .unwrap()
216                    .insert(key.take().unwrap(), token.into());
217                *past_eq = false;
218            } else {
219                stack.last_mut().unwrap().push(token.into());
220            }
221        }
222        /// Blanket implementation that handles a new token while making no assumptions
223        fn add_value_unquoted<T: Into<SaveFileValue> + ToString>(
224            stack: &mut Vec<StackEntry>,
225            key: &mut Option<String>,
226            past_eq: &mut bool,
227            token: T,
228        ) {
229            if *past_eq {
230                stack
231                    .last_mut()
232                    .unwrap()
233                    .insert(key.take().unwrap(), token.into());
234                *past_eq = false;
235            } else {
236                if let Some(key) = key.replace(token.to_string()) {
237                    stack.last_mut().unwrap().push(key.into());
238                }
239            }
240        }
241        match self.tape {
242            Tape::Text(text) => {
243                while let Some(result) = text.next().transpose() {
244                    match result {
245                        Err(e) => return Err(e.into()),
246                        Ok(tok) => match tok {
247                            TextToken::Open => {
248                                stack.push(StackEntry::new(key.take()));
249                                if past_eq {
250                                    past_eq = false;
251                                }
252                            }
253                            TextToken::Close => {
254                                let mut last = stack.pop().unwrap();
255                                if let Some(key) = key.take() {
256                                    last.push(key.into());
257                                }
258                                let name = last.name.take();
259                                let value: SaveFileObject = last.into();
260                                if let Some(entry) = stack.last_mut() {
261                                    if name.is_some() {
262                                        entry.insert(name.unwrap(), value.into());
263                                    } else {
264                                        entry.push(value.into());
265                                    }
266                                } else {
267                                    return Ok(value);
268                                }
269                            }
270                            TextToken::Operator(_op) => {
271                                // here we have a problem, when parsing game code, there can be some instances of non = operators
272                                // MAYBE solve this here somehow
273                                /*
274                                if op == Operator::Equal {
275                                    past_eq = true;
276                                } else {
277                                    past_eq = false;
278                                } */
279                                past_eq = true;
280                            }
281                            TextToken::Quoted(token) => {
282                                add_value_quoted(
283                                    &mut stack,
284                                    &mut key,
285                                    &mut past_eq,
286                                    scalar_to_string(token)?,
287                                );
288                            }
289                            TextToken::Unquoted(token) => {
290                                // zero cost operation
291                                if COLOR_HEADERS.contains(&token.as_bytes()) {
292                                    continue; // we want to skip an unquoted token in situations like this: `color=rgb { 255 255 255 }`
293                                }
294                                add_value_unquoted(
295                                    &mut stack,
296                                    &mut key,
297                                    &mut past_eq,
298                                    scalar_to_string(token)?,
299                                );
300                            }
301                        },
302                    }
303                }
304            }
305            Tape::Binary(binary) => {
306                while let Some(result) = binary.next().transpose() {
307                    match result {
308                        Err(e) => return Err(e.into()),
309                        Ok(tok) => match tok {
310                            BinaryToken::Open => {
311                                stack.push(StackEntry::new(key.take()));
312                                if past_eq {
313                                    past_eq = false;
314                                }
315                            }
316                            BinaryToken::Close => {
317                                let mut last = stack.pop().unwrap();
318                                if let Some(key) = key.take() {
319                                    last.push(key.into());
320                                }
321                                let name = last.name.take();
322                                let value: SaveFileObject = last.into();
323                                if let Some(entry) = stack.last_mut() {
324                                    if name.is_some() {
325                                        entry.insert(name.unwrap(), value.into());
326                                    } else {
327                                        entry.push(value.into());
328                                    }
329                                } else {
330                                    return Ok(value.into());
331                                }
332                            }
333                            BinaryToken::Equal => {
334                                past_eq = true;
335                            }
336                            BinaryToken::Quoted(token) => {
337                                add_value_unquoted(
338                                    &mut stack,
339                                    &mut key,
340                                    &mut past_eq,
341                                    scalar_to_string(token)?,
342                                );
343                            }
344                            BinaryToken::Unquoted(token) => {
345                                add_value_unquoted(
346                                    &mut stack,
347                                    &mut key,
348                                    &mut past_eq,
349                                    scalar_to_string(token)?,
350                                );
351                            }
352                            BinaryToken::Bool(token) => {
353                                add_value_unquoted(&mut stack, &mut key, &mut past_eq, token);
354                            }
355                            BinaryToken::I32(token) => {
356                                add_value_unquoted(&mut stack, &mut key, &mut past_eq, token);
357                            }
358                            BinaryToken::I64(token) => {
359                                add_value_unquoted(&mut stack, &mut key, &mut past_eq, token);
360                            }
361                            BinaryToken::F32(token) => {
362                                add_value_quoted(&mut stack, &mut key, &mut past_eq, token);
363                            }
364                            BinaryToken::F64(token) => {
365                                add_value_quoted(&mut stack, &mut key, &mut past_eq, token);
366                            }
367                            BinaryToken::Id(token) => {
368                                let resolved = TOKEN_TRANSLATOR
369                                    .resolve(token)
370                                    .ok_or(SectionError::UnknownToken(token))?
371                                    .to_string();
372                                add_value_unquoted(&mut stack, &mut key, &mut past_eq, resolved);
373                            }
374                            BinaryToken::Rgb(token) => {
375                                let value = SaveFileObject::Array(vec![
376                                    token.r.into(),
377                                    token.g.into(),
378                                    token.b.into(),
379                                ]);
380                                add_value_quoted(&mut stack, &mut key, &mut past_eq, value);
381                            }
382                            BinaryToken::U32(token) => {
383                                add_value_unquoted(&mut stack, &mut key, &mut past_eq, token);
384                            }
385                            BinaryToken::U64(token) => {
386                                add_value_unquoted(&mut stack, &mut key, &mut past_eq, token);
387                            }
388                        },
389                    }
390                }
391            }
392        }
393        return Ok(stack.pop().unwrap().into());
394    }
395}
396
397impl Debug for Section<'_, '_> {
398    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
399        f.debug_struct("Section").field("name", &self.name).finish()
400    }
401}
402
403#[cfg(test)]
404mod tests {
405
406    use jomini::text::TokenReader;
407
408    use super::*;
409
410    use super::super::types::Tape;
411
412    #[test]
413    fn test_empty() {
414        let mut tape = Tape::Text(TokenReader::from_slice(b""));
415        let mut section = Section::new(&mut tape, "empty".to_string());
416        assert_eq!(section.get_name(), "empty");
417        let obj = section.parse().unwrap();
418        assert!(matches!(obj, SaveFileObject::Array(_)));
419    }
420
421    #[test]
422    fn test_mixed_obj() {
423        let mut tape = Tape::Text(TokenReader::from_slice(b"a b 1=c 2={d=5}}"));
424        let mut section = Section::new(&mut tape, "test".to_string());
425        let obj = section.parse();
426        assert!(obj.is_ok());
427        let res = obj.unwrap();
428        if let SaveFileObject::Array(arr) = res {
429            assert_eq!(arr.len(), 4);
430            let obj = arr.get(2).unwrap();
431            if let SaveFileValue::Object(obj) = obj {
432                assert_eq!(
433                    obj.as_map()
434                        .unwrap()
435                        .get("d")
436                        .unwrap()
437                        .as_integer()
438                        .unwrap(),
439                    5
440                );
441            } else {
442                panic!("expected object");
443            }
444        } else {
445            panic!("expected array");
446        }
447    }
448
449    #[test]
450    fn test_mixed_duplicate_keys() {
451        let mut tape = Tape::Text(TokenReader::from_slice(b"a b 1=c 2={d=5} 1={e=6}"));
452        let mut section = Section::new(&mut tape, "test".to_string());
453        let obj = section.parse().unwrap();
454        obj.as_array()
455            .unwrap()
456            .get(1)
457            .unwrap()
458            .as_object()
459            .unwrap()
460            .as_array()
461            .unwrap();
462    }
463
464    #[test]
465    fn test_rgb() {
466        let mut tape = Tape::Text(TokenReader::from_slice(b"color1=rgb { 220 220 220 }"));
467        let mut section = Section::new(&mut tape, "test".to_string());
468        let obj = section.parse().unwrap();
469        let rgb = obj
470            .as_map()
471            .unwrap()
472            .get("color1")
473            .unwrap()
474            .as_object()
475            .unwrap()
476            .as_array()
477            .unwrap();
478        assert_eq!(rgb.len(), 3);
479    }
480
481    #[test]
482    fn test_skip() {
483        let mut tape = Tape::Text(TokenReader::from_slice(b"color1=rgb { 220 220 220 }} "));
484        let mut section = Section::new(&mut tape, "test".to_string());
485        section.skip().unwrap();
486
487        assert_eq!(tape.position(), 27)
488    }
489
490    #[test]
491    fn test_utf8() {
492        let mut tape = Tape::Text(TokenReader::from_slice(
493            "test=\"Malik al-Muazzam Styrkár\"}".as_bytes(),
494        ));
495        let mut section = Section::new(&mut tape, "test".to_string());
496        let obj = section.parse().unwrap();
497        let utf8 = obj
498            .as_map()
499            .unwrap()
500            .get("test")
501            .unwrap()
502            .as_string()
503            .unwrap();
504        assert_eq!(utf8.as_ref(), "Malik al-Muazzam Styrkár");
505    }
506}