diff --git a/src/parser.rs b/src/parser.rs index dd8e5b0..5110cbc 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -419,6 +419,8 @@ impl Type { Type::Or(types) => types.iter().any(Self::maybe_file_to_send), Type::Array(ty) => ty.maybe_file_to_send(), // Kinda bad, but the alternative is hardcoding every value + // Another alternative is to store every type's description and use it + // Because Bot API docs are inconsitent with it's types Type::Object(object) => object.starts_with("Input") && object != "InputPollOption", } } @@ -489,7 +491,10 @@ pub enum MethodArgs { impl MethodArgs { fn new(args: Vec) -> Self { - if args.iter().any(|arg| arg.kind.maybe_file_to_send()) { + if args + .iter() + .any(|arg| arg.is_file_to_upload_according_to_desc() || arg.kind.maybe_file_to_send()) + { Self::WithMultipart(args) } else if args.is_empty() { Self::No @@ -507,6 +512,13 @@ pub struct Argument { pub description: String, } +impl Argument { + /// This is more reliable than checking the type + fn is_file_to_upload_according_to_desc(&self) -> bool { + self.description.contains("multipart/form-data") + } +} + fn make_url_from_fragment(fragment: String) -> String { assert!(fragment.starts_with('#')); format!("{}{}", BOT_API_DOCS_URL, fragment) diff --git a/src/parser/sentence.rs b/src/parser/sentence.rs index 4c76ad2..201987d 100644 --- a/src/parser/sentence.rs +++ b/src/parser/sentence.rs @@ -19,22 +19,10 @@ impl Pattern { fn parts(self) -> Vec { match self { Pattern::ReturnType => vec![ - SearcherPattern::default() - .by_word("Returns") - .by_word("the") - .by_word("bot's") - .by_word("Telegram") - .exclude(), - SearcherPattern::default() - .by_word("Returns") - .by_word("the") - .by_word("list") - .by_word("of") - .exclude(), SearcherPattern::default().by_word("On").by_word("success"), + SearcherPattern::default().by_word("returns").by_word("a"), + SearcherPattern::default().by_word("Returns").by_word("an"), SearcherPattern::default().by_word("Returns"), - SearcherPattern::default().by_word("returns"), - SearcherPattern::default().by_word("An"), ], Pattern::Default => vec![ SearcherPattern::default().by_word("Defaults").by_word("to"), @@ -48,6 +36,11 @@ impl Pattern { .by_word("be") .by_kind(PartKind::Italic) .with_offset(-1), + SearcherPattern::default() + .by_word("must") + .by_word("be") + .by_quotes() + .with_offset(-1), SearcherPattern::default() .by_word("always") .by_quotes() @@ -63,6 +56,11 @@ impl Pattern { ], Pattern::OneOf => { vec![ + SearcherPattern::default() + .by_word("Can") + .by_word("be") + .by_word("available") + .exclude(), SearcherPattern::default().by_word("either"), SearcherPattern::default().by_word("One").by_word("of"), SearcherPattern::default().by_word("one").by_word("of"), @@ -129,6 +127,8 @@ impl PartialEq<&[Part]> for SearcherPattern { #[logos(skip r"[, ]")] #[logos(skip "\n")] enum SentenceLexer { + #[regex(r"\.[A-Z][A-Z\d]{2,}")] + FileExt, #[regex(r#"[^, "“”\(\)\.\n]+"#)] Word, #[token(".")] @@ -418,6 +418,10 @@ pub(crate) fn parse_node(elem: NodeRef) -> Result, ParseErro }; match token { + SentenceLexer::FileExt => { + let part = Part::new(lexeme.to_string()); + parts.push(part); + } SentenceLexer::Word if !paren => { let part = Part::new(lexeme.to_string()); parts.push(part); @@ -514,10 +518,15 @@ pub fn parse_type_custom( where E: Fn(&SentenceRef) -> Option, { - let sentences = text.sentences()?; + let mut sentences = text.sentences()?; let mut result = None; let patterns = pattern.parts(); + // Ignore the first sentence if it's not the only one and we are looking for a return type + if pattern == Pattern::ReturnType && sentences.len() > 1 { + sentences.remove(0); + } + 'sentences: for sentence in &sentences { for pattern in &patterns { for (word_idx, words) in sentence.parts.windows(pattern.parts.len()).enumerate() { diff --git a/src/util.rs b/src/util.rs index 9a011f4..36de6c0 100644 --- a/src/util.rs +++ b/src/util.rs @@ -6,7 +6,7 @@ pub trait StrExt { fn is_first_letter_lowercase(self) -> bool; } -impl<'a> StrExt for &'a str { +impl StrExt for &str { fn is_first_letter_lowercase(self) -> bool { self.chars().next().map(|c| c.is_lowercase()).unwrap() }