diff --git a/src/core/EntrySearcher.cpp b/src/core/EntrySearcher.cpp index cb9e135fe3..375de88623 100644 --- a/src/core/EntrySearcher.cpp +++ b/src/core/EntrySearcher.cpp @@ -157,48 +157,60 @@ bool EntrySearcher::searchEntryImpl(const Entry* entry) // However when skipping protected fields, we will reject everything instead bool found = !m_skipProtected; for (const auto& term : m_searchTerms) { + auto normalize = [&term](const QString& str) -> QString { + return term.accentSensitive ? str : Tools::stripDiacritics(str); + }; + + auto anyMatch = [&](const QStringList& list) -> bool { + for (const auto& item : list) { + if (term.regex.match(normalize(item)).hasMatch()) + return true; + } + return false; + }; + switch (term.field) { case Field::Title: - found = term.regex.match(entry->resolvePlaceholder(entry->title())).hasMatch(); + found = term.regex.match(normalize(entry->resolvePlaceholder(entry->title()))).hasMatch(); break; case Field::Username: - found = term.regex.match(entry->resolvePlaceholder(entry->username())).hasMatch(); + found = term.regex.match(normalize(entry->resolvePlaceholder(entry->username()))).hasMatch(); break; case Field::Password: if (m_skipProtected) { continue; } - found = term.regex.match(entry->resolvePlaceholder(entry->password())).hasMatch(); + found = term.regex.match(normalize(entry->resolvePlaceholder(entry->password()))).hasMatch(); break; case Field::Url: - found = term.regex.match(entry->resolvePlaceholder(entry->url())).hasMatch(); + found = term.regex.match(normalize(entry->resolvePlaceholder(entry->url()))).hasMatch(); break; case Field::Notes: - found = term.regex.match(entry->notes()).hasMatch(); + found = term.regex.match(normalize(entry->notes())).hasMatch(); break; case Field::AttributeKV: - found = !attributes.filter(term.regex).empty(); + found = anyMatch(attributes); break; case Field::Attachment: - found = !attachments.filter(term.regex).empty(); + found = anyMatch(attachments); break; case Field::AttributeValue: if (m_skipProtected && entry->attributes()->isProtected(term.word)) { continue; } found = entry->attributes()->contains(term.word) - && term.regex.match(entry->attributes()->value(term.word)).hasMatch(); + && term.regex.match(normalize(entry->attributes()->value(term.word))).hasMatch(); break; case Field::Group: // Match against the full hierarchy if the word contains a '/' otherwise just the group name if (term.word.contains('/')) { - found = term.regex.match(hierarchy).hasMatch(); + found = term.regex.match(normalize(hierarchy)).hasMatch(); } else if (entry->group()) { - found = term.regex.match(entry->group()->name()).hasMatch(); + found = term.regex.match(normalize(entry->group()->name())).hasMatch(); } break; case Field::Tag: - found = entry->tagList().indexOf(term.regex) != -1; + found = anyMatch(entry->tagList()); break; case Field::Is: if (term.word.startsWith("expired", Qt::CaseInsensitive)) { @@ -233,10 +245,10 @@ bool EntrySearcher::searchEntryImpl(const Entry* entry) break; default: // Terms without a specific field try to match title, username, url, and notes - found = term.regex.match(entry->resolvePlaceholder(entry->title())).hasMatch() - || term.regex.match(entry->resolvePlaceholder(entry->username())).hasMatch() - || term.regex.match(entry->resolvePlaceholder(entry->url())).hasMatch() - || entry->tagList().indexOf(term.regex) != -1 || term.regex.match(entry->notes()).hasMatch(); + found = term.regex.match(normalize(entry->resolvePlaceholder(entry->title()))).hasMatch() + || term.regex.match(normalize(entry->resolvePlaceholder(entry->username()))).hasMatch() + || term.regex.match(normalize(entry->resolvePlaceholder(entry->url()))).hasMatch() + || anyMatch(entry->tagList()) || term.regex.match(normalize(entry->notes())).hasMatch(); } // negate the result if exclude: @@ -296,15 +308,24 @@ void EntrySearcher::parseSearchTerms(const QString& searchString) auto mods = result.captured(1); + // Exact match modifier implies accent-sensitive matching + term.accentSensitive = mods.contains("+"); + + // Normalize term for accent-insensitive search (unless exact match) + auto wordForRegex = term.word; + if (!term.accentSensitive) { + wordForRegex = Tools::stripDiacritics(wordForRegex); + } + // Convert term to regex int opts = m_caseSensitive ? Tools::RegexConvertOpts::CASE_SENSITIVE : Tools::RegexConvertOpts::DEFAULT; if (!mods.contains("*")) { opts |= Tools::RegexConvertOpts::WILDCARD_ALL; } - if (mods.contains("+")) { + if (term.accentSensitive) { opts |= Tools::RegexConvertOpts::EXACT_MATCH; } - term.regex = Tools::convertToRegex(term.word, opts); + term.regex = Tools::convertToRegex(wordForRegex, opts); // Exclude modifier term.exclude = mods.contains("-") || mods.contains("!"); diff --git a/src/core/EntrySearcher.h b/src/core/EntrySearcher.h index a15916fb04..cf21d1e4ef 100644 --- a/src/core/EntrySearcher.h +++ b/src/core/EntrySearcher.h @@ -52,6 +52,7 @@ class EntrySearcher QString word; QRegularExpression regex; bool exclude; + bool accentSensitive; }; explicit EntrySearcher(bool caseSensitive = false, bool skipProtected = false); diff --git a/src/core/Tools.cpp b/src/core/Tools.cpp index 37a544c517..65e48ac7ab 100644 --- a/src/core/Tools.cpp +++ b/src/core/Tools.cpp @@ -454,6 +454,33 @@ namespace Tools return string.replace("&", "&&"); } + QString stripDiacritics(const QString& str) + { + // Fast path: pure ASCII has no diacritics to strip + bool ascii = true; + for (const auto& ch : str) { + if (ch.unicode() > 127) { + ascii = false; + break; + } + } + if (ascii) { + return str; + } + + // Strip combining marks after NFD decomposition + auto decomposed = str.normalized(QString::NormalizationForm_D); + QString result; + result.reserve(decomposed.size()); + for (const auto& ch : decomposed) { + if (ch.category() != QChar::Mark_NonSpacing && ch.category() != QChar::Mark_SpacingCombining + && ch.category() != QChar::Mark_Enclosing) { + result.append(ch); + } + } + return result; + } + QVariantMap qo2qvm(const QObject* object, const QStringList& ignoredProperties) { QVariantMap result; diff --git a/src/core/Tools.h b/src/core/Tools.h index a8fe5b2be1..5ee73e1e9d 100644 --- a/src/core/Tools.h +++ b/src/core/Tools.h @@ -50,6 +50,7 @@ namespace Tools QString cleanFilename(QString filename); QString cleanUsername(); QString escapeAccelerators(QString string); + QString stripDiacritics(const QString& str); template QSet asSet(const QList& a) { diff --git a/src/gui/DatabaseWidget.cpp b/src/gui/DatabaseWidget.cpp index 2afde49fee..3e27411acc 100644 --- a/src/gui/DatabaseWidget.cpp +++ b/src/gui/DatabaseWidget.cpp @@ -93,7 +93,7 @@ DatabaseWidget::DatabaseWidget(QSharedPointer db, QWidget* parent) , m_tagView(new TagView(this)) , m_saveAttempts(0) , m_remoteSettings(new RemoteSettings(m_db, this)) - , m_entrySearcher(new EntrySearcher(false)) + , m_entrySearcher(new EntrySearcher()) { Q_ASSERT(m_db); diff --git a/tests/TestEntrySearcher.cpp b/tests/TestEntrySearcher.cpp index d002a31664..a9a1ac2da4 100644 --- a/tests/TestEntrySearcher.cpp +++ b/tests/TestEntrySearcher.cpp @@ -434,3 +434,106 @@ void TestEntrySearcher::testTotpSearch() QVERIFY(!m_searchResult.contains(entry2)); QVERIFY(!m_searchResult.contains(entry3)); } + +void TestEntrySearcher::testAccentInsensitiveSearch() +{ + auto entry1 = new Entry(); + entry1->setGroup(m_rootGroup); + entry1->setTitle(QString::fromUtf8("používateľ")); + entry1->setUsername("user1"); + + auto entry2 = new Entry(); + entry2->setGroup(m_rootGroup); + entry2->setTitle("pouzivatel"); + entry2->setUsername("user2"); + + auto entry3 = new Entry(); + entry3->setGroup(m_rootGroup); + entry3->setTitle(QString::fromUtf8("café")); + entry3->setUsername("user3"); + + auto entry4 = new Entry(); + entry4->setGroup(m_rootGroup); + entry4->setTitle("unrelated"); + entry4->setUsername("user4"); + + // Default search is accent-insensitive: ASCII query matches accented entry + m_searchResult = m_entrySearcher.search("pouzivatel", m_rootGroup); + QCOMPARE(m_searchResult.count(), 2); + QVERIFY(m_searchResult.contains(entry1)); + QVERIFY(m_searchResult.contains(entry2)); + + // Accented query matches ASCII entry + m_searchResult = m_entrySearcher.search(QString::fromUtf8("používateľ"), m_rootGroup); + QCOMPARE(m_searchResult.count(), 2); + QVERIFY(m_searchResult.contains(entry1)); + QVERIFY(m_searchResult.contains(entry2)); + + // Accented query matches differently-accented entry ("café" -> "cafe") + m_searchResult = m_entrySearcher.search("cafe", m_rootGroup); + QCOMPARE(m_searchResult.count(), 1); + QVERIFY(m_searchResult.contains(entry3)); + + // Field-specific search works with accent folding + m_searchResult = m_entrySearcher.search("title:pouzivatel", m_rootGroup); + QCOMPARE(m_searchResult.count(), 2); + QVERIFY(m_searchResult.contains(entry1)); + QVERIFY(m_searchResult.contains(entry2)); + + // Exact match (+) forces accent-sensitive: ASCII doesn't match accented + m_searchResult = m_entrySearcher.search("+pouzivatel", m_rootGroup); + QCOMPARE(m_searchResult.count(), 1); + QVERIFY(m_searchResult.contains(entry2)); + + // Exact match (+) with accented query only matches accented entry + m_searchResult = m_entrySearcher.search(QString::fromUtf8("+používateľ"), m_rootGroup); + QCOMPARE(m_searchResult.count(), 1); + QVERIFY(m_searchResult.contains(entry1)); + + // Exact match (+) with café only matches café, not cafe + m_searchResult = m_entrySearcher.search("+cafe", m_rootGroup); + QCOMPARE(m_searchResult.count(), 0); + + m_searchResult = m_entrySearcher.search(QString::fromUtf8("+café"), m_rootGroup); + QCOMPARE(m_searchResult.count(), 1); + QVERIFY(m_searchResult.contains(entry3)); + + // Exclude modifier works with accent folding + m_searchResult = m_entrySearcher.search("!pouzivatel", m_rootGroup); + QCOMPARE(m_searchResult.count(), 2); + QVERIFY(m_searchResult.contains(entry3)); + QVERIFY(m_searchResult.contains(entry4)); + + // Attachment search with accent folding + entry1->attachments()->set(QString::fromUtf8("schéma.pdf"), QByteArray()); + m_searchResult = m_entrySearcher.search("attachment:schema", m_rootGroup); + QCOMPARE(m_searchResult.count(), 1); + QVERIFY(m_searchResult.contains(entry1)); + + // Tag search with accent folding + entry1->addTag(QString::fromUtf8("résumé")); + m_searchResult = m_entrySearcher.search("tag:resume", m_rootGroup); + QCOMPARE(m_searchResult.count(), 1); + QVERIFY(m_searchResult.contains(entry1)); + + // Group search with accent folding + auto accentGroup = new Group(); + accentGroup->setParent(m_rootGroup); + accentGroup->setName(QString::fromUtf8("Récréation")); + auto entry5 = new Entry(); + entry5->setGroup(accentGroup); + entry5->setTitle("in accented group"); + m_searchResult = m_entrySearcher.search("group:recreation", accentGroup); + QCOMPARE(m_searchResult.count(), 1); + QVERIFY(m_searchResult.contains(entry5)); + + // Case-sensitive combined with accent-insensitive (default) + m_entrySearcher.setCaseSensitive(true); + m_searchResult = m_entrySearcher.search("Pouzivatel", m_rootGroup); + QCOMPARE(m_searchResult.count(), 0); + m_searchResult = m_entrySearcher.search("pouzivatel", m_rootGroup); + QCOMPARE(m_searchResult.count(), 2); + QVERIFY(m_searchResult.contains(entry1)); + QVERIFY(m_searchResult.contains(entry2)); + m_entrySearcher.setCaseSensitive(false); +} diff --git a/tests/TestEntrySearcher.h b/tests/TestEntrySearcher.h index 54a45d5789..a9f730193a 100644 --- a/tests/TestEntrySearcher.h +++ b/tests/TestEntrySearcher.h @@ -40,6 +40,7 @@ private slots: void testSkipProtected(); void testUUIDSearch(); void testTotpSearch(); + void testAccentInsensitiveSearch(); private: Group* m_rootGroup;