diff --git a/breadability/scoring.py b/breadability/scoring.py index a042af2..84f0155 100644 --- a/breadability/scoring.py +++ b/breadability/scoring.py @@ -85,7 +85,7 @@ def get_link_density(node, node_text=None): """ if node_text is None: node_text = node.text_content() - node_text = normalize_whitespace(node_text.strip()) + node_text = normalize_whitespace(node_text) text_length = len(node_text) if text_length == 0: @@ -101,7 +101,7 @@ def get_link_density(node, node_text=None): def _get_normalized_text_length(node): - return len(normalize_whitespace(node.text_content().strip())) + return len(normalize_whitespace(node.text_content())) def get_class_weight(node): diff --git a/breadability/utils.py b/breadability/utils.py index 70a9778..af4821d 100644 --- a/breadability/utils.py +++ b/breadability/utils.py @@ -18,9 +18,6 @@ def ignored(*exceptions): pass -MULTIPLE_WHITESPACE_PATTERN = re.compile(r"\s+", re.UNICODE) - - def is_blank(text): """ Returns ``True`` if string contains only whitespace characters @@ -29,26 +26,14 @@ def is_blank(text): return not text or text.isspace() -def shrink_text(text): - return normalize_whitespace(text.strip()) - - def normalize_whitespace(text): """ Translates multiple whitespace into single space character. - If there is at least one new line character chunk is replaced - by single LF (Unix new line) character. """ - return MULTIPLE_WHITESPACE_PATTERN.sub(_replace_whitespace, text) - + return ' '.join(text.split()) -def _replace_whitespace(match): - text = match.group() - if "\n" in text or "\r" in text: - return "\n" - else: - return " " +shrink_text = normalize_whitespace def cached_property(getter):