Use lookbehind for spelling

This commit is contained in:
kageru 2022-06-02 11:41:45 +02:00
parent e0f85df40a
commit 4165a39852
3 changed files with 72 additions and 22 deletions

38
Cargo.lock generated
View File

@ -8,6 +8,15 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aho-corasick"
version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
dependencies = [
"memchr",
]
[[package]]
name = "async-trait"
version = "0.1.53"
@ -58,12 +67,27 @@ name = "basedbot"
version = "0.1.0"
dependencies = [
"async-trait",
"fancy-regex",
"lazy_static",
"regex",
"serenity",
"tokio",
]
[[package]]
name = "bit-set"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -169,6 +193,16 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "fancy-regex"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0678ab2d46fa5195aaf59ad034c083d351377d4af57f3e073c074d0da3e3c766"
dependencies = [
"bit-set",
"regex",
]
[[package]]
name = "flate2"
version = "1.0.24"
@ -657,6 +691,8 @@ version = "1.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]

View File

@ -7,6 +7,6 @@ edition = "2021"
[dependencies]
serenity = { version = "0.10", default_features = false, features = ["client", "rustls_backend", "model", "builder", "gateway", "utils", "cache"] }
lazy_static = "1.4.0"
regex = { version = "1.5.6", default_features = false, features = ["std", "unicode"] }
fancy-regex = "0.10"
async-trait = "0.1.53"
tokio = { version = "1.18.2", features = ["rt-multi-thread"] }

View File

@ -1,6 +1,7 @@
#![feature(option_result_contains)]
use async_trait::async_trait;
use fancy_regex::Regex;
use lazy_static::lazy_static;
use regex::{Captures, Regex};
use serenity::{
model::{
channel::ReactionType,
@ -17,7 +18,8 @@ lazy_static! {
static ref SERVER_ID: GuildId = GuildId(std::env::args().nth(1).unwrap().parse().unwrap());
static ref MEME_CHANNEL: ChannelId =
ChannelId(std::env::args().nth(2).unwrap().parse().unwrap());
static ref RETARD_REGEX: Regex = Regex::new("([^djDJh ])a( |$)").unwrap();
static ref RETARD_REGEX: Regex =
Regex::new("(?<!(. | j| d|op|in|us|ng|si|tw|dd))a( |$)").unwrap();
}
#[async_trait]
@ -29,32 +31,24 @@ impl EventHandler for Handler {
}
}
fn fix_spelling(msg: &str) -> String {
RETARD_REGEX.replace_all(&msg, "**er** ").trim().to_owned()
}
async fn handle_message(ctx: Context, message: Message) -> Result<(), serenity::Error> {
if message.guild_id != Some(*SERVER_ID) {
return Ok(());
}
// That other idiot who ends words with “a” instead of “er”
if message.author.id == 261246789942902794 && RETARD_REGEX.is_match(&message.content) {
if message.author.id == 261246789942902794
&& RETARD_REGEX.is_match(&message.content).contains(&true)
&& !message.content.starts_with("a ")
{
message.delete(&ctx).await?;
let fixed = fix_spelling(&message.content);
message
.channel_id
.say(&ctx, &format!("{} wollte sagen:", message.author.mention()))
.await?;
message
.channel_id
.say(
&ctx,
RETARD_REGEX
.replace_all(&message.content_safe(&ctx).await, |caps: &Captures| {
format!("{}**er**{}", &caps[1], &caps[2])
})
// some common false positives
.replace("etw**er**", "etwa")
.replace("europ**er**", "europa")
.replace("amerik**er**", "amerika")
.replace("chin**er**", "china")
.replace("mang**er**", "manga"),
)
.say(&ctx, &format!("{}: {}", message.author.mention(), fixed))
.await?;
}
// that one idiot who always posts 5 links per message
@ -114,3 +108,23 @@ async fn main() {
.expect("Could not create client");
client.start().await.expect("could not start");
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fix_spelling_test() {
let msg = "aba ihr meint es ja bessa zu wissen ohne euch damit auseinanda gesetzt zu haben oda zu wollen";
let expected = "ab**er** ihr meint es ja bess**er** zu wissen ohne euch damit auseinand**er** gesetzt zu haben od**er** zu wollen";
assert_eq!(fix_spelling(msg), expected);
let msg = "your a bad person";
assert_eq!(fix_spelling(msg), msg);
let msg = "china usa europa da ja manga asia etwa unsa";
let expected = "china usa europa da ja manga asia etwa uns**er**";
assert_eq!(fix_spelling(msg), expected);
let msg = "guta tip";
let expected = "gut**er** tip";
assert_eq!(fix_spelling(msg), expected);
}
}