ck3_history_extractor/game_data/
localizer.rs

1use std::path::{Path, PathBuf};
2use std::{fmt, fs, mem};
3
4use serde::Serialize;
5
6use super::super::types::{GameString, HashMap};
7
8/* This is an imperfect localization parser. Unfortunately, the localization
9files are far too complex to be parsed without also implementing a whole
10game around it. This is a simple parser that will handle the most common
11cases WE will encounter.
12https://ck3.paradoxwikis.com/Localization - very important page.
13we do want to handle $$ syntax, and [] function args, but the formatting? idk probably not
14*/
15
16/// A function that demangles a generic name.
17/// It will replace underscores with spaces and capitalize the first letter.
18fn demangle_generic(input: &str) -> String {
19    const PREFIXES: [&str; 16] = [
20        "dynn_",
21        "nick_",
22        "death_",
23        "tenet_",
24        "doctrine_",
25        "ethos_",
26        "heritage_",
27        "language_",
28        "martial_custom_",
29        "tradition_",
30        "e_",
31        "k_",
32        "d_",
33        "c_",
34        "b_",
35        "x_x_",
36    ];
37    const SUFFIXES: [&str; 2] = ["_name", "_perk"];
38
39    let mut s = input;
40    for prefix in PREFIXES {
41        if let Some(stripped) = s.strip_prefix(prefix) {
42            s = stripped;
43            break;
44        }
45    }
46    for suffix in SUFFIXES {
47        if let Some(stripped) = s.strip_suffix(suffix) {
48            s = stripped;
49            break;
50        }
51    }
52    let mut s = s.replace("_", " ");
53    if s.is_empty() {
54        return s;
55    }
56    let first = s.chars().nth(0).unwrap();
57    if first.is_ascii_alphabetic() {
58        s[0..1].make_ascii_uppercase();
59    }
60    s
61}
62
63/// An object that localizes strings.
64/// It reads localization data from a directory and provides localized strings.
65/// If the localization data is not found, it will demangle the key using an algorithm that tries to approximate the intended text
66pub struct Localizer {
67    /// Whether at least a single file has been loaded
68    initialized: bool,
69    data: HashMap<String, GameString>,
70}
71
72impl Default for Localizer {
73    fn default() -> Self {
74        Localizer {
75            initialized: false,
76            data: HashMap::default(),
77        }
78    }
79}
80
81impl Serialize for Localizer {
82    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
83    where
84        S: serde::Serializer,
85    {
86        self.data.serialize(serializer)
87    }
88}
89
90impl Localizer {
91    /// Adds localization data from a directory.
92    /// The path may be invalid, in which case the function will simply do nothing
93    pub fn add_from_path<P: AsRef<Path>>(&mut self, path: P) {
94        let path = path.as_ref();
95        if path.is_dir() {
96            // a stack to keep track of the directories
97            let mut stack: Vec<PathBuf> = vec![PathBuf::from(path)];
98            // a vector to keep track of all the files
99            let mut all_files: Vec<PathBuf> = Vec::new();
100            while let Some(entry) = stack.pop() {
101                if let Ok(entries) = fs::read_dir(entry) {
102                    for entry in entries {
103                        if let Ok(entry) = entry {
104                            if let Ok(file_type) = entry.file_type() {
105                                if file_type.is_dir() {
106                                    stack.push(entry.path());
107                                } else if entry.file_name().to_str().unwrap().ends_with(".yml") {
108                                    all_files.push(entry.path());
109                                }
110                            }
111                        }
112                    }
113                }
114            }
115            // having gone through all the directories, we can now read the files
116            for entry in all_files {
117                // read the file to string
118                let contents = fs::read_to_string(entry).unwrap();
119                // add the file to the localizer
120                self.add_localization_file(&contents);
121            }
122        }
123    }
124
125    pub fn add_localization_file(&mut self, contents: &str) {
126        self.initialized = true;
127        //The thing here is that these 'yaml' files are... peculiar. rust_yaml doesn't seem to be able to parse them correctly
128        //so we doing the thing ourselves :)
129
130        //parse the 'yaml' file
131        let mut key = String::new();
132        let mut value = String::new();
133        let mut past = false;
134        let mut quotes = false;
135        for char in contents.chars() {
136            match char {
137                ' ' | '\t' => {
138                    if quotes {
139                        value.push(char);
140                    }
141                }
142                '\n' => {
143                    if past && !quotes && !value.is_empty() {
144                        self.data
145                            .insert(mem::take(&mut key), GameString::from(mem::take(&mut value)));
146                    } else {
147                        key.clear()
148                    }
149                    past = false;
150                    quotes = false;
151                }
152                ':' => {
153                    past = true;
154                }
155                '"' => {
156                    quotes = !quotes;
157                }
158                _ => {
159                    if past {
160                        if quotes {
161                            value.push(char);
162                        }
163                    } else {
164                        key.push(char);
165                    }
166                }
167            }
168        }
169    }
170
171    /*
172    From what I can gather there are three types of special localisation invocations:
173    - $key$ - use that key instead of the key that was used to look up the string
174    - [function(arg).function(arg)...] handling this one is going to be a nightmare
175    - # #! - these are formatting instructions, can be nested
176    */
177
178    pub fn remove_formatting(&mut self) {
179        for (_, value) in self.data.iter_mut() {
180            let mut new = String::with_capacity(value.len());
181            let mut iter = value.chars();
182            let mut open = false;
183            let mut func_open = false;
184            while let Some(c) = iter.next() {
185                match c {
186                    '#' => {
187                        if open {
188                            open = false;
189                            if let Some(next) = iter.next() {
190                                // we skip the ! in #!
191                                if next != '!' {
192                                    new.push(next);
193                                }
194                            }
195                        } else {
196                            open = true;
197                            // skip to space
198                            while let Some(c) = iter.next() {
199                                if c == ' ' {
200                                    break;
201                                }
202                            }
203                        }
204                    }
205                    '$' => {
206                        func_open = !func_open;
207                        new.push(c);
208                    }
209                    '[' => {
210                        func_open = true;
211                        new.push(c);
212                    }
213                    ']' => {
214                        func_open = false;
215                        new.push(c);
216                    }
217                    '|' => {
218                        if func_open {
219                            while let Some(c) = iter.next() {
220                                if c == ']' {
221                                    new.push(c);
222                                    break;
223                                }
224                            }
225                        } else {
226                            new.push(c);
227                        }
228                    }
229                    _ => {
230                        new.push(c);
231                    }
232                }
233            }
234            *value = GameString::from(new);
235        }
236    }
237}
238
239/// A localization query. A function name and a list of arguments.
240pub type LocalizationQuery = (String, Vec<String>);
241
242/// A stack of localization queries.
243pub type LocalizationStack = Vec<LocalizationQuery>;
244
245/// An error that occurs when localizing a string.
246#[derive(Debug)]
247pub enum LocalizationError {
248    InvalidQuery(GameString, LocalizationStack),
249    LocalizationSyntaxError(&'static str),
250}
251
252fn create_localization_stack(input: String) -> Result<LocalizationStack, LocalizationError> {
253    // MAYBE in future resolve recursively the arguments? as of right now theoretically the arguments may themselves be functions and we don't handle that
254    let mut stack: LocalizationStack = Vec::new();
255    let mut call = String::new();
256    let mut args: Vec<String> = Vec::new();
257    let mut arg = String::new();
258    let mut collect_args = false;
259    for char in input.chars() {
260        match char {
261            '(' => {
262                collect_args = true;
263            }
264            ')' => {
265                collect_args = false;
266                if !arg.is_empty() {
267                    args.push(mem::take(&mut arg));
268                }
269            }
270            ',' => {
271                if collect_args {
272                    args.push(mem::take(&mut arg));
273                }
274            }
275            '.' => {
276                if collect_args {
277                    arg.push(char);
278                } else {
279                    stack.push((mem::take(&mut call), mem::take(&mut args)));
280                }
281            }
282            ']' => {
283                Err(LocalizationError::LocalizationSyntaxError(
284                    "unexpected ']' character",
285                ))?;
286            }
287            '\'' => {} // ignore
288            _ => {
289                if collect_args {
290                    arg.push(char);
291                } else {
292                    call.push(char);
293                }
294            }
295        }
296    }
297    stack.push((call, args));
298    Ok(stack)
299}
300
301impl fmt::Display for LocalizationError {
302    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
303        match self {
304            LocalizationError::InvalidQuery(val, stack) => {
305                write!(f, "a query: {:?} in {} is in some way invalid.", stack, val)
306            }
307            LocalizationError::LocalizationSyntaxError(s) => {
308                write!(f, "localization syntax error: {}", s)
309            }
310        }
311    }
312}
313
314impl std::error::Error for LocalizationError {
315    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
316        None
317    }
318}
319
320pub trait Localize<T: AsRef<str> + From<String>> {
321    /// A simple function that looks up raw value associated with the given localization key
322    fn lookup<K: AsRef<str>>(&self, key: K) -> Option<T>;
323
324    fn is_empty(&self) -> bool;
325
326    /// A simple localization function that will return the localized string.
327    /// It assumes that the key is not complex and does not require any special handling.
328    fn localize<K: AsRef<str>>(&self, key: K) -> Result<T, LocalizationError> {
329        self.localize_query(key, |_| -> Option<&str> { None })
330    }
331
332    /// A localization function that will return the localized string.
333    /// It assumes a more complex key, resolving $provider$ into the value.
334    /// More complex keys will not be resolved.
335    fn localize_provider<K: AsRef<str>>(
336        &self,
337        key: K,
338        provider: &str,
339        value: &str,
340    ) -> Result<T, LocalizationError> {
341        let query = |q: &LocalizationStack| {
342            if q.len() == 1 && q.first().unwrap().0 == provider {
343                Some(value)
344            } else {
345                None
346            }
347        };
348        self.localize_query(key, query)
349    }
350
351    /// A localization function that will return the localized string.
352    /// It allows for complete control over the complex key resolution.
353    /// Every time a $key$ or [function(arg)] is encountered, the query function will be called.
354    /// The query function should return the value in accordance to the provided stack, or None if the value is not found.
355    /// Whether None causes an error or not is up to the implementation.
356    fn localize_query<K: AsRef<str>, S: AsRef<str>, F: Fn(&LocalizationStack) -> Option<S>>(
357        &self,
358        key: K,
359        query: F,
360    ) -> Result<T, LocalizationError> {
361        if let Some(d) = self.lookup(key.as_ref()) {
362            let value = d.as_ref();
363            // we have A template localization string, now we have to resolve it
364            let mut collect = false;
365            let mut collection = String::with_capacity(value.len());
366            let mut arg = String::new();
367            // this is technically a less efficient way of doing it, but it's easier to read
368            for c in value.chars() {
369                match c {
370                    '$' => {
371                        collect = !collect;
372                        if !collect {
373                            if let Some(val) = self.lookup(&arg) {
374                                collection.push_str(val.as_ref());
375                                arg.clear();
376                            } else {
377                                let stack = vec![(mem::take(&mut arg), Vec::new())];
378                                if let Some(val) = query(&stack) {
379                                    collection.push_str(val.as_ref());
380                                } else {
381                                    if cfg!(feature = "permissive") {
382                                        collection
383                                            .push_str(demangle_generic(arg.as_ref()).as_str());
384                                    } else {
385                                        return Err(LocalizationError::InvalidQuery(
386                                            value.into(),
387                                            stack,
388                                        ));
389                                    }
390                                }
391                            }
392                        }
393                    }
394                    '[' => {
395                        if collect {
396                            return Err(LocalizationError::LocalizationSyntaxError(
397                                "unexpected '[' character",
398                            ));
399                        } else {
400                            collect = true;
401                        }
402                    }
403                    ']' => {
404                        if collect {
405                            collect = false;
406                            let stack = create_localization_stack(mem::take(&mut arg))?;
407                            if let Some(val) = query(&stack) {
408                                collection.push_str(val.as_ref());
409                            } else {
410                                if !cfg!(feature = "permissive") {
411                                    return Err(LocalizationError::InvalidQuery(
412                                        value.into(),
413                                        stack,
414                                    ));
415                                }
416                            }
417                        } else {
418                            return Err(LocalizationError::LocalizationSyntaxError(
419                                "unexpected ']' character",
420                            ));
421                        }
422                    }
423                    _ => {
424                        if collect {
425                            arg.push(c);
426                        } else {
427                            collection.push(c);
428                        }
429                    }
430                }
431            }
432            return Ok(collection.into());
433        } else {
434            if !cfg!(feature = "permissive")
435                && !self.is_empty()
436                && !key.as_ref().is_empty()
437                && key.as_ref().contains('_')
438            {
439                eprintln!("Warning: key {} not found", key.as_ref());
440            }
441            return Ok(demangle_generic(key.as_ref()).into());
442        }
443    }
444}
445
446impl Localize<GameString> for Localizer {
447    fn lookup<K: AsRef<str>>(&self, key: K) -> Option<GameString> {
448        self.data.get(key.as_ref()).cloned()
449    }
450
451    fn is_empty(&self) -> bool {
452        self.data.is_empty()
453    }
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459
460    #[test]
461    fn test_demangle_generic() {
462        assert_eq!(demangle_generic("dynn_test_name"), "Test");
463        assert_eq!(demangle_generic("dynn_test_perk"), "Test");
464        assert_eq!(demangle_generic("dynn_test"), "Test");
465    }
466
467    #[test]
468    fn test_links() {
469        let mut localizer = Localizer::default();
470        localizer
471            .data
472            .insert("key".to_string(), GameString::from("value"));
473        localizer
474            .data
475            .insert("test".to_string(), GameString::from("$key$"));
476        localizer
477            .data
478            .insert("test2".to_string(), GameString::from(" $key$ "));
479        localizer
480            .data
481            .insert("test3".to_string(), GameString::from(" $key$ $key$ "));
482        assert_eq!(localizer.localize("key").unwrap().as_ref(), "value");
483        assert_eq!(localizer.localize("test").unwrap().as_ref(), "value");
484        assert_eq!(localizer.localize("test2").unwrap().as_ref(), " value ");
485        assert_eq!(
486            localizer.localize("test3").unwrap().as_ref(),
487            " value value "
488        );
489    }
490
491    #[test]
492    fn test_remove_formatting() {
493        let mut localizer = Localizer::default();
494        localizer
495            .data
496            .insert("test".to_string(), GameString::from("#P value#! # #!"));
497        localizer
498            .data
499            .insert("test2".to_string(), GameString::from("[test|U] [test|idk]"));
500        localizer.remove_formatting();
501        assert_eq!(localizer.localize("test").unwrap().as_ref(), "value ");
502        assert_eq!(
503            localizer.data.get("test2").unwrap().as_ref(),
504            "[test] [test]"
505        );
506    }
507
508    #[test]
509    fn test_stack() {
510        let mut localizer = Localizer::default();
511        localizer
512            .data
513            .insert("trait_test".to_string(), GameString::from("Trait test"));
514        localizer.data.insert(
515            "test".to_string(),
516            GameString::from("[GetTrait(trait_test).GetName()]"),
517        );
518        localizer.data.insert(
519            "test2".to_string(),
520            GameString::from("   [GetTrait(trait_test).GetName()]  "),
521        );
522        localizer.data.insert(
523            "test3".to_string(),
524            GameString::from(" hello( [GetTrait(trait_test).GetName()] ) "),
525        );
526        localizer.data.insert(
527            "test4".to_string(),
528            GameString::from(" hello,.(., [GetTrait(trait_test).GetName()] ) "),
529        );
530        let query = |stack: &LocalizationStack| Some(localizer.localize(&stack[0].1[0]).unwrap());
531        assert_eq!(
532            localizer.localize_query("test", query).unwrap().as_ref(),
533            "Trait test"
534        );
535        assert_eq!(
536            localizer.localize_query("test2", query).unwrap().as_ref(),
537            "   Trait test  "
538        );
539        assert_eq!(
540            localizer.localize_query("test3", query).unwrap().as_ref(),
541            " hello( Trait test ) "
542        );
543        assert_eq!(
544            localizer.localize_query("test4", query).unwrap().as_ref(),
545            " hello,.(., Trait test ) "
546        );
547    }
548
549    #[test]
550    fn test_really_nasty() {
551        let result =
552            create_localization_stack("GetTrait(trait_test).GetName()".to_owned()).unwrap();
553        assert_eq!(result.len(), 2);
554        assert_eq!(result[0].0, "GetTrait");
555        assert_eq!(result[0].1.len(), 1);
556        assert_eq!(result[0].1[0], "trait_test");
557        assert_eq!(result[1].0, "GetName");
558        assert_eq!(result[1].1.len(), 0);
559    }
560
561    #[test]
562    fn test_french() {
563        let input = "Select_CString(CHARACTER.IsFemale,'brûlé','vif')";
564        let result = create_localization_stack(input.to_owned()).unwrap();
565        assert_eq!(result.len(), 1);
566        assert_eq!(result[0].0, "Select_CString");
567        assert_eq!(result[0].1.len(), 3);
568        assert_eq!(result[0].1[0], "CHARACTER.IsFemale");
569        assert_eq!(result[0].1[1], "brûlé");
570        assert_eq!(result[0].1[2], "vif");
571    }
572}