From 4165a398520bcedb28085cc604411d61059f497a Mon Sep 17 00:00:00 2001 From: kageru Date: Thu, 2 Jun 2022 11:41:45 +0200 Subject: [PATCH] Use lookbehind for spelling --- Cargo.lock | 38 ++++++++++++++++++++++++++++++++++++- Cargo.toml | 2 +- src/main.rs | 54 +++++++++++++++++++++++++++++++++-------------------- 3 files changed, 72 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c434d86..a1fbc3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + [[package]] name = "async-trait" version = "0.1.53" @@ -58,12 +67,27 @@ name = "basedbot" version = "0.1.0" dependencies = [ "async-trait", + "fancy-regex", "lazy_static", - "regex", "serenity", "tokio", ] +[[package]] +name = "bit-set" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -169,6 +193,16 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "fancy-regex" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0678ab2d46fa5195aaf59ad034c083d351377d4af57f3e073c074d0da3e3c766" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "flate2" version = "1.0.24" @@ -657,6 +691,8 @@ version = "1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" dependencies = [ + "aho-corasick", + "memchr", "regex-syntax", ] diff --git a/Cargo.toml b/Cargo.toml index 2c7a8c2..7a8decd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,6 @@ edition = "2021" [dependencies] serenity = { version = "0.10", default_features = false, features = ["client", "rustls_backend", "model", "builder", "gateway", "utils", "cache"] } lazy_static = "1.4.0" -regex = { version = "1.5.6", default_features = false, features = ["std", "unicode"] } +fancy-regex = "0.10" async-trait = "0.1.53" tokio = { version = "1.18.2", features = ["rt-multi-thread"] } diff --git a/src/main.rs b/src/main.rs index 7f2d83c..e5ffe14 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ +#![feature(option_result_contains)] use async_trait::async_trait; +use fancy_regex::Regex; use lazy_static::lazy_static; -use regex::{Captures, Regex}; use serenity::{ model::{ channel::ReactionType, @@ -17,7 +18,8 @@ lazy_static! { static ref SERVER_ID: GuildId = GuildId(std::env::args().nth(1).unwrap().parse().unwrap()); static ref MEME_CHANNEL: ChannelId = ChannelId(std::env::args().nth(2).unwrap().parse().unwrap()); - static ref RETARD_REGEX: Regex = Regex::new("([^djDJh ])a( |$)").unwrap(); + static ref RETARD_REGEX: Regex = + Regex::new("(? String { + RETARD_REGEX.replace_all(&msg, "**er** ").trim().to_owned() +} + async fn handle_message(ctx: Context, message: Message) -> Result<(), serenity::Error> { if message.guild_id != Some(*SERVER_ID) { return Ok(()); } // That other idiot who ends words with β€œa” instead of β€œer” - if message.author.id == 261246789942902794 && RETARD_REGEX.is_match(&message.content) { + if message.author.id == 261246789942902794 + && RETARD_REGEX.is_match(&message.content).contains(&true) + && !message.content.starts_with("a ") + { message.delete(&ctx).await?; + let fixed = fix_spelling(&message.content); message .channel_id - .say(&ctx, &format!("{} wollte sagen:", message.author.mention())) - .await?; - message - .channel_id - .say( - &ctx, - RETARD_REGEX - .replace_all(&message.content_safe(&ctx).await, |caps: &Captures| { - format!("{}**er**{}", &caps[1], &caps[2]) - }) - // some common false positives - .replace("etw**er**", "etwa") - .replace("europ**er**", "europa") - .replace("amerik**er**", "amerika") - .replace("chin**er**", "china") - .replace("mang**er**", "manga"), - ) + .say(&ctx, &format!("{}: {}", message.author.mention(), fixed)) .await?; } // that one idiot who always posts 5 links per message @@ -114,3 +108,23 @@ async fn main() { .expect("Could not create client"); client.start().await.expect("could not start"); } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn fix_spelling_test() { + let msg = "aba ihr meint es ja bessa zu wissen ohne euch damit auseinanda gesetzt zu haben oda zu wollen"; + let expected = "ab**er** ihr meint es ja bess**er** zu wissen ohne euch damit auseinand**er** gesetzt zu haben od**er** zu wollen"; + assert_eq!(fix_spelling(msg), expected); + let msg = "your a bad person"; + assert_eq!(fix_spelling(msg), msg); + let msg = "china usa europa da ja manga asia etwa unsa"; + let expected = "china usa europa da ja manga asia etwa uns**er**"; + assert_eq!(fix_spelling(msg), expected); + let msg = "guta tip"; + let expected = "gut**er** tip"; + assert_eq!(fix_spelling(msg), expected); + } +}