Skip to main content

chatsounds/parsing/
mod.rs

1mod modifiers;
2
3use std::convert::Into;
4
5use nom::{
6    IResult, Parser,
7    bytes::complete::take_while1,
8    multi::{many0, many1},
9};
10use rand::{Rng, seq::IndexedRandom};
11
12pub use self::modifiers::ModifierTrait;
13use self::modifiers::{Modifier, SelectModifier, parse_modifier};
14use crate::{
15    Chatsound,
16    error::{Error, Result},
17};
18
19#[derive(Debug, PartialEq)]
20pub struct ParsedChatsound {
21    pub sentence: String,
22    pub modifiers: Vec<Modifier>,
23}
24
25/// Normalize a sentence for `map_store` lookup. Rules:
26/// - ASCII alphanumerics are kept verbatim (case preserved).
27/// - `'` and `,` are dropped without inserting a space, so contractions
28///   (`we've` → `weve`) and thousands separators (`1,000` → `1000`)
29///   stay one token.
30/// - Any other character (whitespace, `_`, `-`, `.`, `!`, non-ASCII, ...)
31///   is treated as a word boundary; runs collapse to a single space and
32///   leading/trailing spaces are trimmed.
33#[must_use]
34pub fn normalize_sentence(s: &str) -> String {
35    let mut out = String::with_capacity(s.len());
36    let mut prev_space = true;
37    for c in s.chars() {
38        if c == '\'' || c == ',' {
39            continue;
40        }
41        if c.is_ascii_alphanumeric() {
42            out.push(c);
43            prev_space = false;
44        } else if !prev_space {
45            out.push(' ');
46            prev_space = true;
47        }
48    }
49    if out.ends_with(' ') {
50        out.pop();
51    }
52    out
53}
54
55impl ParsedChatsound {
56    pub fn choose<'a, R: Rng>(
57        &self,
58        chatsounds: &'a [Chatsound],
59        mut rng: R,
60    ) -> Option<&'a Chatsound> {
61        let modifier = self
62            .modifiers
63            .iter()
64            .find(|m| matches!(m, Modifier::Select(_)));
65        if let Some(Modifier::Select(SelectModifier { select })) = modifier {
66            chatsounds.get(*select as usize)
67        } else {
68            chatsounds.choose(&mut rng)
69        }
70    }
71}
72
73fn parse_chatsound(input: &str) -> IResult<&str, ParsedChatsound> {
74    // input = "hello:pitch(2)"
75
76    let input = input.trim();
77    let (input, sentence) = take_while1(|c: char| {
78        c.is_ascii_alphanumeric() || c.is_ascii_whitespace() || c == '\'' || c == ','
79    })(input)?;
80
81    // input = ":pitch(2)"
82    // sentence = "hello"
83
84    let (input, modifiers) = many0(parse_modifier).parse(input)?;
85
86    let chatsound = ParsedChatsound {
87        sentence: sentence.to_string(),
88        modifiers,
89    };
90
91    Ok((input, chatsound))
92}
93
94pub fn parse(input: &str) -> Result<Vec<ParsedChatsound>> {
95    match many1(parse_chatsound).parse(input) {
96        Ok((_input, chatsounds)) => Ok(chatsounds),
97        Err(err) => Err(Error::Nom {
98            err: err.map_input(Into::into),
99            text: input.into(),
100        }),
101    }
102}
103
104#[test]
105#[expect(clippy::too_many_lines, reason = "table-driven test cases")]
106fn test_parser() {
107    use self::modifiers::*;
108
109    assert_eq!(
110        parse("hello:pitch(2)").unwrap(),
111        vec![ParsedChatsound {
112            sentence: "hello".to_string(),
113            modifiers: vec![Modifier::Pitch(PitchModifier { pitch: 2.0 })],
114        }]
115    );
116    assert_eq!(
117        parse("hello:volume(4):pitch(2)").unwrap(),
118        vec![ParsedChatsound {
119            sentence: "hello".to_string(),
120            modifiers: vec![
121                Modifier::Volume(VolumeModifier { volume: 4.0 }),
122                Modifier::Pitch(PitchModifier { pitch: 2.0 })
123            ]
124        }]
125    );
126    assert_eq!(
127        parse("hello:pitch(2) more stuff").unwrap(),
128        vec![
129            ParsedChatsound {
130                sentence: "hello".to_string(),
131                modifiers: vec![Modifier::Pitch(PitchModifier { pitch: 2.0 })]
132            },
133            ParsedChatsound {
134                sentence: "more stuff".to_string(),
135                modifiers: vec![]
136            }
137        ]
138    );
139    assert_eq!(
140        parse("hello").unwrap(),
141        vec![ParsedChatsound {
142            sentence: "hello".to_string(),
143            modifiers: vec![]
144        }]
145    );
146    assert_eq!(
147        parse("hello:select(2)").unwrap(),
148        vec![ParsedChatsound {
149            sentence: "hello".to_string(),
150            modifiers: vec![Modifier::Select(SelectModifier { select: 1 })]
151        }]
152    );
153
154    assert_eq!(
155        parse("hello world:pitch(123) another one:volume(22):pitch(33) ").unwrap(),
156        vec![
157            ParsedChatsound {
158                sentence: "hello world".to_string(),
159                modifiers: vec![Modifier::Pitch(PitchModifier { pitch: 123.0 })]
160            },
161            ParsedChatsound {
162                sentence: "another one".to_string(),
163                modifiers: vec![
164                    Modifier::Volume(VolumeModifier { volume: 22.0 }),
165                    Modifier::Pitch(PitchModifier { pitch: 33.0 })
166                ]
167            }
168        ]
169    );
170
171    assert_eq!(
172        parse("they're on").unwrap(),
173        vec![ParsedChatsound {
174            sentence: "they're on".to_string(),
175            modifiers: vec![]
176        }]
177    );
178    assert_eq!(
179        parse("they're on:pitch(2)").unwrap(),
180        vec![ParsedChatsound {
181            sentence: "they're on".to_string(),
182            modifiers: vec![Modifier::Pitch(PitchModifier { pitch: 2.0 })]
183        }]
184    );
185    // `,` is allowed inside the sentence span so user input with thousands
186    // separators / Oxford-comma phrasing round-trips through normalization.
187    assert_eq!(
188        parse("hello,world").unwrap(),
189        vec![ParsedChatsound {
190            sentence: "hello,world".to_string(),
191            modifiers: vec![]
192        }]
193    );
194    assert_eq!(
195        parse("hello, world").unwrap(),
196        vec![ParsedChatsound {
197            sentence: "hello, world".to_string(),
198            modifiers: vec![]
199        }]
200    );
201    assert_eq!(
202        parse("1,000:pitch(2)").unwrap(),
203        vec![ParsedChatsound {
204            sentence: "1,000".to_string(),
205            modifiers: vec![Modifier::Pitch(PitchModifier { pitch: 2.0 })]
206        }]
207    );
208    assert_eq!(
209        parse("we've got 1,000:volume(4)").unwrap(),
210        vec![ParsedChatsound {
211            sentence: "we've got 1,000".to_string(),
212            modifiers: vec![Modifier::Volume(VolumeModifier { volume: 4.0 })]
213        }]
214    );
215    // Commas inside modifier args (e.g. `echo(duration, amplitude)`) still
216    // parse as arg separators — the sentence span ends at `:`, so allowing
217    // `,` in the sentence only affects what comes before the first `:`.
218    assert_eq!(
219        parse("1,000:pitch(1.5):echo(0.5,0.2)").unwrap(),
220        vec![ParsedChatsound {
221            sentence: "1,000".to_string(),
222            modifiers: vec![
223                Modifier::Pitch(PitchModifier { pitch: 1.5 }),
224                Modifier::Echo(EchoModifier {
225                    duration: 0.5,
226                    amplitude_diff: -0.2,
227                    amount: 1,
228                }),
229            ]
230        }]
231    );
232    // `-` is not in the allowed set; the sentence ends at the dash.
233    assert_eq!(
234        parse("hello-world").unwrap(),
235        vec![ParsedChatsound {
236            sentence: "hello".to_string(),
237            modifiers: vec![]
238        }]
239    );
240
241    println!(
242        "partial success: {:#?}",
243        parse("helloh:pitch(2) bad:pitch(bad)")
244    );
245    println!("partial success: {:#?}", parse("bad pitch:pitch(asdf)"));
246
247    println!("error: {:#?}", parse(""));
248    println!("error: {:#?}", parse("😂"));
249}
250
251#[test]
252fn test_normalize_sentence() {
253    // Apostrophes drop without inserting a space, so contractions stay one word.
254    assert_eq!(normalize_sentence("they're on"), "theyre on");
255    assert_eq!(normalize_sentence("don't"), "dont");
256    assert_eq!(normalize_sentence("we're're"), "werere");
257    assert_eq!(normalize_sentence("'leading"), "leading");
258    assert_eq!(normalize_sentence("trailing'"), "trailing");
259    assert_eq!(normalize_sentence("'''"), "");
260
261    // Commas drop without inserting a space, so thousands separators stay one number.
262    assert_eq!(normalize_sentence("1,000"), "1000");
263    assert_eq!(normalize_sentence("a,b,c"), "abc");
264    assert_eq!(normalize_sentence("hello,world"), "helloworld");
265    assert_eq!(normalize_sentence(",,,"), "");
266    assert_eq!(normalize_sentence(",leading"), "leading");
267    assert_eq!(normalize_sentence("trailing,"), "trailing");
268    assert_eq!(normalize_sentence("5,4,3,2,1"), "54321");
269    // Comma is dropped, then the space is the only boundary between words.
270    assert_eq!(normalize_sentence("hello, world"), "hello world");
271    // `,` drops; `.` and `$` become spaces and collapse with the spaces around them.
272    assert_eq!(normalize_sentence("$1,000.00"), "1000 00");
273    // Apostrophe + comma combined.
274    assert_eq!(normalize_sentence("we've got 1,000"), "weve got 1000");
275
276    // Any other non-alnum char becomes a space; runs collapse; ends trim.
277    assert_eq!(normalize_sentence("dad-please"), "dad please");
278    assert_eq!(normalize_sentence("Hello,   World!"), "Hello World");
279    assert_eq!(normalize_sentence("0-a"), "0 a");
280    assert_eq!(normalize_sentence(" file test  "), "file test");
281    assert_eq!(normalize_sentence("!file_test!"), "file test");
282    assert_eq!(normalize_sentence("a_-_b"), "a b");
283    assert_eq!(normalize_sentence("a---b"), "a b");
284    assert_eq!(normalize_sentence("a.b.c"), "a b c");
285    assert_eq!(normalize_sentence("___"), "");
286    assert_eq!(normalize_sentence("@@@"), "");
287    assert_eq!(normalize_sentence("\thello\tworld\n"), "hello world");
288
289    // Case preserved.
290    assert_eq!(
291        normalize_sentence("yes no yes no YES NO"),
292        "yes no yes no YES NO"
293    );
294    assert_eq!(
295        normalize_sentence("hell yeah now we've got business"),
296        "hell yeah now weve got business"
297    );
298    assert_eq!(normalize_sentence("Mixed CASE 123"), "Mixed CASE 123");
299
300    // Empty / whitespace-only.
301    assert_eq!(normalize_sentence(""), "");
302    assert_eq!(normalize_sentence("   "), "");
303    assert_eq!(normalize_sentence("  spaced  "), "spaced");
304
305    // Non-ASCII is treated as non-alnum (becomes space, then trims).
306    assert_eq!(normalize_sentence("café"), "caf");
307}