diff --git a/app/src/main/java/app/grapheneos/pdfviewer/Utils.java b/app/src/main/java/app/grapheneos/pdfviewer/Utils.java index e2edf661f..631012b6d 100644 --- a/app/src/main/java/app/grapheneos/pdfviewer/Utils.java +++ b/app/src/main/java/app/grapheneos/pdfviewer/Utils.java @@ -1,15 +1,46 @@ package app.grapheneos.pdfviewer; -import android.text.TextUtils; - import java.math.RoundingMode; import java.text.DateFormat; import java.text.DecimalFormat; import java.text.ParseException; import java.util.Calendar; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class Utils { + // PDF date string format. Based on the format described in section 7.9.4 of + // the PDF 32000-2:2020 specification: + // + // D:YYYYMMDDHHmmSSOHH'mm + // + // The PDF 1.7 reference defined the same format with a terminating + // apostrophe, and PDF processors are recommended to accept date strings + // that follow that older convention. The apostrophe between HH and mm is + // also tolerated as missing for additional leniency, matching pdf.js. + private static final Pattern PDF_DATE_PATTERN = Pattern.compile( + "^D:" + + "(\\d{4})" + // Year (required) + "(\\d{2})?" + // Month (optional) + "(\\d{2})?" + // Day (optional) + "(\\d{2})?" + // Hours (optional) + "(\\d{2})?" + // Minutes (optional) + "(\\d{2})?" + // Seconds (optional) + "(?:" + + " ([Z+\\-])" + // Universal time relation + " (?:" + + " (\\d{2})" + // Offset hours + " '?" + // Splitting apostrophe (optional) + " (?:" + + " (\\d{2})" + // Offset minutes + " '?" + // Trailing apostrophe (optional, PDF <= 1.7) + " )?" + + " )?" + + ")?$", + Pattern.COMMENTS + ); + private static int parseIntSafely(String field) throws ParseException { try { return Integer.parseInt(field); @@ -18,163 +49,82 @@ private static int parseIntSafely(String field) throws ParseException { } } - // Parse date as per PDF spec (complies with PDF v1.4 to v1.7) - public static String parseDate(String date) throws ParseException { - int position = 0; + private static int parseGroup(Matcher matcher, int group, int defaultValue) throws ParseException { + final String field = matcher.group(group); + return field == null ? defaultValue : parseIntSafely(field); + } - // D: prefix is optional for PDF < v1.7; required for PDF v1.7 + // Parse date as per PDF spec (complies with PDF v1.4 to v2.0) + public static String parseDate(String date) throws ParseException { + // D: prefix is optional for PDF < v1.7; required for PDF v1.7+ if (!date.startsWith("D:")) { date = "D:" + date; } - if (date.length() < 6 || date.length() > 23) { - throw new ParseException("Invalid datetime length", position); + + final Matcher matcher = PDF_DATE_PATTERN.matcher(date); + if (!matcher.matches()) { + throw new ParseException("Invalid date format", 0); } final Calendar calendar = Calendar.getInstance(); final int currentYear = calendar.get(Calendar.YEAR); - // Year is required - String field = date.substring(position += 2, 6); - if (!TextUtils.isDigitsOnly(field)) { - throw new ParseException("Invalid year", position); - } - int year = parseIntSafely(field); + int year = parseIntSafely(matcher.group(1)); if (year > currentYear) { year = currentYear; } - position += 4; - - // Default value for month and day shall be 1 (calendar month starts at 0 in Java 7), - // all others default to 0 - int month = 0; - int day = 1; - int hours = 0; - int minutes = 0; - int seconds = 0; - - // All succeeding fields are optional, but each preceding field must be present - if (date.length() >= 8) { - field = date.substring(position, 8); - if (!TextUtils.isDigitsOnly(field)) { - throw new ParseException("Invalid month", position); - } - month = parseIntSafely(field) - 1; - if (month > 11) { - throw new ParseException("Invalid month", position); - } - position += 2; + // Calendar month starts at 0 in Java; defaults for month and day are 1 + // per the spec, all other fields default to 0. + final int month = parseGroup(matcher, 2, 1) - 1; + if (month < 0 || month > 11) { + throw new ParseException("Invalid month", 0); } - if (date.length() >= 10) { - field = date.substring(8, 10); - if (!TextUtils.isDigitsOnly(field)) { - throw new ParseException("Invalid day", position); - } - day = parseIntSafely(field); - if (day > 31) { - throw new ParseException("Invalid day", position); - } - position += 2; + final int day = parseGroup(matcher, 3, 1); + if (day < 1 || day > 31) { + throw new ParseException("Invalid day", 0); } - if (date.length() >= 12) { - field = date.substring(10, 12); - if (!TextUtils.isDigitsOnly(field)) { - throw new ParseException("Invalid hours", position); - } - hours = parseIntSafely(field); - if (hours > 23) { - throw new ParseException("Invalid hours", position); - } - position += 2; + int hours = parseGroup(matcher, 4, 0); + if (hours > 23) { + throw new ParseException("Invalid hours", 0); } - if (date.length() >= 14) { - field = date.substring(12, 14); - if (!TextUtils.isDigitsOnly(field)) { - throw new ParseException("Invalid minutes", position); - } - minutes = parseIntSafely(field); - if (minutes > 59) { - throw new ParseException("Invalid minutes", position); - } - position += 2; + int minutes = parseGroup(matcher, 5, 0); + if (minutes > 59) { + throw new ParseException("Invalid minutes", 0); } - if (date.length() >= 16) { - field = date.substring(14, 16); - if (!TextUtils.isDigitsOnly(field)) { - throw new ParseException("Invalid seconds", position); - } - seconds = parseIntSafely(field); - if (seconds > 59) { - throw new ParseException("Invalid seconds", position); - } - position += 2; + final int seconds = parseGroup(matcher, 6, 0); + if (seconds > 59) { + throw new ParseException("Invalid seconds", 0); } - - if (date.length() > position) { - int offsetHours = 0; - int offsetMinutes = 0; - - final char utRel = date.charAt(position); - if (utRel != '\u002D' && utRel != '\u002B' && utRel != '\u005A') { - throw new ParseException("Invalid UT relation", position); + final String utRel = matcher.group(7); + if (utRel != null) { + final int offsetHours = parseGroup(matcher, 8, 0); + final int offsetMinutes = parseGroup(matcher, 9, 0); + if (offsetMinutes > 59) { + throw new ParseException("Invalid UTC offset minutes", 0); } - - position++; - - if (date.length() > position + 2) { - field = date.substring(position, position + 2); - if (!TextUtils.isDigitsOnly(field)) { - throw new ParseException("Invalid UTC offset hours", position); - } - offsetHours = parseIntSafely(field); - final int offsetHoursMinutes = offsetHours * 100 + offsetMinutes; - - // Validate UTC offset (UTC-12:00 to UTC+14:00) - if ((utRel == '\u002D' && offsetHoursMinutes > 1200) || - (utRel == '\u002B' && offsetHoursMinutes > 1400)) { - throw new ParseException("Invalid UTC offset hours", position); - } - - position += 2; - - // Apostrophe shall succeed HH and precede mm - if (date.charAt(position) != '\'') { - throw new ParseException("Expected apostrophe", position); - } - - position++; - - if (date.length() > position + 2) { - field = date.substring(position, position + 2); - if (!TextUtils.isDigitsOnly(field)) { - throw new ParseException("Invalid UTC offset minutes", position); - } - offsetMinutes = parseIntSafely(field); - if (offsetMinutes > 59) { - throw new ParseException("Invalid UTC offset minutes", position); - } - position += 2; - - // Apostrophe shall succeed mm - if (date.charAt(position) != '\'') { - throw new ParseException("Expected apostrophe", position); - } - } - } - - + final int offsetHoursMinutes = offsetHours * 100 + offsetMinutes; + // Validate UTC offset (UTC-12:00 to UTC+14:00; "Z" means UTC) switch (utRel) { - case '\u002D': + case "-": + if (offsetHoursMinutes > 1200) { + throw new ParseException("Invalid UTC offset", 0); + } hours -= offsetHours; minutes -= offsetMinutes; break; - case '\u002B': + case "+": + if (offsetHoursMinutes > 1400) { + throw new ParseException("Invalid UTC offset", 0); + } hours += offsetHours; minutes += offsetMinutes; break; - default: - // "Z" means equal to UTC + case "Z": + if (offsetHoursMinutes != 0) { + throw new ParseException("UTC indicator 'Z' must not have a non-zero offset", 0); + } break; } }