1use std::path::{Component, Path};
2
3use serde::Deserialize;
4
5use crate::{
6 Chatsounds, Error, cache::download, error::Result, parsing::normalize_sentence,
7 types::Chatsound,
8};
9
10#[derive(Deserialize)]
11pub struct GitHubApiFileEntry {
12 pub path: String,
13 pub r#type: String,
14 pub size: Option<usize>,
15}
16
17#[derive(Deserialize)]
18pub struct GitHubApiTrees {
19 pub tree: Vec<GitHubApiFileEntry>,
20}
21
22pub type GitHubMsgpackEntries = Vec<Vec<String>>;
23
24fn parse_api_entry(repo_path: &str, entry_path: &str) -> Option<(String, String)> {
34 let suffix = entry_path
35 .strip_prefix(repo_path)
36 .and_then(|s| s.strip_prefix('/'))?;
37 let path = Path::new(suffix);
38 let Component::Normal(filename) = path.components().nth(1)? else {
39 return None;
40 };
41 if filename.to_string_lossy().starts_with('.') {
42 return None;
43 }
44 let stem = Path::new(filename).file_stem()?;
45 let key = normalize_sentence(&stem.to_string_lossy());
46 if key.is_empty() {
47 return None;
48 }
49 Some((key, suffix.to_string()))
50}
51
52impl Chatsounds {
53 pub async fn fetch_github_api(&self, repo: &str, _repo_path: &str) -> Result<GitHubApiTrees> {
54 let api_url = format!("https://api.github.com/repos/{repo}/git/trees/HEAD?recursive=1");
55
56 #[cfg(feature = "fs")]
57 let cache = &self.cache_path;
58 #[cfg(feature = "memory")]
59 let cache = self.fs_memory.clone();
60
61 tracing::debug!(repo, "fetching GitHub tree API");
62 let bytes = download(&api_url, cache, false).await?;
63
64 let trees: GitHubApiTrees =
65 serde_json::from_slice(&bytes).map_err(|err| Error::Json { err, url: api_url })?;
66
67 tracing::debug!(repo, entries = trees.tree.len(), "fetched GitHub tree API");
68 Ok(trees)
69 }
70
71 pub fn load_github_api(
72 &mut self,
73 repo: &str,
74 repo_path: &str,
75 trees: GitHubApiTrees,
76 ) -> Result<()> {
77 let mut added = 0usize;
78 for entry in trees.tree {
79 if entry.r#type != "blob" {
80 continue;
81 }
82 let Some((sentence, sound_path)) = parse_api_entry(repo_path, &entry.path) else {
83 continue;
84 };
85
86 let vec = self.map_store.entry(sentence).or_default();
87 let chatsound = Chatsound {
88 repo: repo.to_string(),
89 repo_path: repo_path.to_string(),
90 sound_path,
91 };
92
93 let url = chatsound.get_url();
94 match vec.binary_search_by(|c| c.get_url().cmp(&url)) {
95 Ok(_pos) => {
96 }
98 Err(pos) => {
99 vec.insert(pos, chatsound);
100 added += 1;
101 }
102 }
103 }
104
105 tracing::debug!(
106 repo,
107 repo_path,
108 added,
109 total_keys = self.map_store.len(),
110 "loaded chatsounds from GitHub tree API"
111 );
112 Ok(())
113 }
114
115 pub async fn fetch_github_msgpack(
116 &self,
117 repo: &str,
118 repo_path: &str,
119 ) -> Result<GitHubMsgpackEntries> {
120 let msgpack_url =
121 format!("https://raw.githubusercontent.com/{repo}/HEAD/{repo_path}/list.msgpack");
122
123 #[cfg(feature = "fs")]
124 let cache = &self.cache_path;
125 #[cfg(feature = "memory")]
126 let cache = self.fs_memory.clone();
127
128 tracing::debug!(repo, repo_path, "fetching list.msgpack");
129 let bytes = download(&msgpack_url, cache, false).await?;
130 let entries: GitHubMsgpackEntries =
131 rmp_serde::decode::from_slice(&bytes).map_err(|err| Error::Msgpack {
132 err,
133 url: msgpack_url,
134 })?;
135
136 tracing::debug!(
137 repo,
138 repo_path,
139 entries = entries.len(),
140 "fetched list.msgpack"
141 );
142 Ok(entries)
143 }
144
145 pub fn load_github_msgpack(
146 &mut self,
147 repo: &str,
148 repo_path: &str,
149 entries: GitHubMsgpackEntries,
150 ) -> Result<()> {
151 let mut added = 0usize;
152 for entry in entries {
153 let sentence = normalize_sentence(&entry[1]);
155 if sentence.is_empty() {
156 continue;
157 }
158 let sound_path = entry[2].clone();
159 let vec = self.map_store.entry(sentence).or_default();
160
161 let chatsound = Chatsound {
162 repo: repo.to_string(),
163 repo_path: repo_path.to_string(),
164 sound_path,
165 };
166
167 let url = chatsound.get_url();
168 match vec.binary_search_by(|c| c.get_url().cmp(&url)) {
169 Ok(_pos) => {
170 }
172 Err(pos) => {
173 vec.insert(pos, chatsound);
174 added += 1;
175 }
176 }
177 }
178
179 tracing::debug!(
180 repo,
181 repo_path,
182 added,
183 total_keys = self.map_store.len(),
184 "loaded chatsounds from list.msgpack"
185 );
186 Ok(())
187 }
188}
189
190#[cfg(test)]
191mod tests {
192 use super::parse_api_entry;
193
194 #[test]
195 fn weird_filenames_from_user() {
196 let repo = "sounds";
197
198 assert_eq!(
199 parse_api_entry(repo, "sounds/sammich/yes no yes no YES NO.ogg"),
200 Some((
201 "yes no yes no YES NO".to_string(),
202 "sammich/yes no yes no YES NO.ogg".to_string(),
203 )),
204 );
205 assert_eq!(
206 parse_api_entry(
207 repo,
208 "sounds/bill_wurtz/hell yeah now we've got business.ogg"
209 ),
210 Some((
211 "hell yeah now weve got business".to_string(),
212 "bill_wurtz/hell yeah now we've got business.ogg".to_string(),
213 )),
214 );
215 assert_eq!(
216 parse_api_entry(repo, "sounds/piano ogg/0-a.ogg"),
217 Some(("0 a".to_string(), "piano ogg/0-a.ogg".to_string())),
218 );
219 assert_eq!(
220 parse_api_entry(repo, "sounds/test/ file test .ogg"),
221 Some(("file test".to_string(), "test/ file test .ogg".to_string())),
222 );
223 assert_eq!(
224 parse_api_entry(repo, "sounds/test/!file_test!.ogg"),
225 Some(("file test".to_string(), "test/!file_test!.ogg".to_string())),
226 );
227 }
228
229 #[test]
230 fn dotfiles_are_skipped() {
231 let repo = "sounds";
232 assert_eq!(parse_api_entry(repo, "sounds/sammich/.gitkeep"), None);
233 assert_eq!(parse_api_entry(repo, "sounds/cat/.DS_Store"), None);
234 assert_eq!(parse_api_entry(repo, "sounds/cat/.gitignore"), None);
235 assert_eq!(parse_api_entry(repo, "sounds/cat/.hidden.ogg"), None);
237 }
238
239 #[test]
240 fn top_level_files_are_skipped() {
241 let repo = "sounds";
244 assert_eq!(parse_api_entry(repo, "sounds/README.md"), None);
245 assert_eq!(parse_api_entry(repo, "sounds/file.ogg"), None);
246 }
247
248 #[test]
249 fn outside_repo_path_is_skipped() {
250 let repo = "sounds";
251 assert_eq!(parse_api_entry(repo, "other/cat/file.ogg"), None);
252 assert_eq!(parse_api_entry(repo, "soundsthing/cat/file.ogg"), None);
254 assert_eq!(parse_api_entry(repo, "sounds"), None);
255 assert_eq!(parse_api_entry(repo, ""), None);
256 }
257
258 #[test]
259 fn empty_after_normalization_is_skipped() {
260 let repo = "sounds";
261 assert_eq!(parse_api_entry(repo, "sounds/cat/!!!.ogg"), None);
262 assert_eq!(parse_api_entry(repo, "sounds/cat/'''.ogg"), None);
263 assert_eq!(parse_api_entry(repo, "sounds/cat/ .ogg"), None);
264 }
265
266 #[test]
267 fn subdir_takes_subdir_name_as_key() {
268 let repo = "sounds";
272 assert_eq!(
273 parse_api_entry(repo, "sounds/e26/nestetrismusic/1.ogg"),
274 Some((
275 "nestetrismusic".to_string(),
276 "e26/nestetrismusic/1.ogg".to_string(),
277 )),
278 );
279 }
280
281 #[test]
282 fn category_with_punctuation_still_works() {
283 let repo = "sounds";
286 assert_eq!(
287 parse_api_entry(repo, "sounds/!weird cat!/hello.ogg"),
288 Some(("hello".to_string(), "!weird cat!/hello.ogg".to_string())),
289 );
290 }
291
292 #[test]
293 fn file_with_no_extension_keeps_whole_name() {
294 let repo = "sounds";
295 assert_eq!(
296 parse_api_entry(repo, "sounds/cat/foo"),
297 Some(("foo".to_string(), "cat/foo".to_string())),
298 );
299 }
300
301 #[test]
302 fn commas_in_filename_drop_without_space() {
303 let repo = "sounds";
307 assert_eq!(
308 parse_api_entry(repo, "sounds/cat/1,000 meme.ogg"),
309 Some(("1000 meme".to_string(), "cat/1,000 meme.ogg".to_string())),
310 );
311 assert_eq!(
312 parse_api_entry(repo, "sounds/cat/a,b,c.ogg"),
313 Some(("abc".to_string(), "cat/a,b,c.ogg".to_string())),
314 );
315 assert_eq!(
316 parse_api_entry(repo, "sounds/cat/yes,no,maybe.ogg"),
317 Some(("yesnomaybe".to_string(), "cat/yes,no,maybe.ogg".to_string(),)),
318 );
319 assert_eq!(
321 parse_api_entry(repo, "sounds/cat/we've got 1,000.ogg"),
322 Some((
323 "weve got 1000".to_string(),
324 "cat/we've got 1,000.ogg".to_string(),
325 )),
326 );
327 assert_eq!(parse_api_entry(repo, "sounds/cat/,,,.ogg"), None);
329 }
330}