using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; using NLog; using NzbDrone.Common.Extensions; using NzbDrone.Common.Instrumentation; using NzbDrone.Core.Books; using NzbDrone.Core.Parser.Model; namespace NzbDrone.Core.Parser { public static class Parser { private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(Parser)); private static readonly Regex[] ReportMusicTitleRegex = new[] { // Track with author (01 - author - trackName) new Regex(@"(?\d*){0,1}([-| ]{0,1})(?[a-zA-Z0-9, ().&_]*)[-| ]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Track without author (01 - trackName) new Regex(@"(?\d*)[-| .]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Track without trackNumber or author(trackName) new Regex(@"(?\d*)[-| .]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Track without trackNumber and with author(author - trackName) new Regex(@"(?\d*)[-| .]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), // Track with author and starting title (01 - author - trackName) new Regex(@"(?\d*){0,1}[-| ]{0,1}(?[a-zA-Z0-9, ().&_]*)[-| ]{0,1}(?[a-zA-Z0-9, ().&_]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), }; private static readonly Regex[] ReportBookTitleRegex = new[] { //ruTracker - (Genre) [Source]? Author - Discography new Regex(@"^(?:\(.+?\))(?:\W*(?:\[(?.+?)\]))?\W*(?.+?)(?: - )(?Discography|Discografia).+?(?\d{4}).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author - Discography with two years new Regex(@"^(?.+?)(?: - )(?:.+?)?(?Discography|Discografia).+?(?\d{4}).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author - Discography with end year new Regex(@"^(?.+?)(?: - )(?:.+?)?(?Discography|Discografia).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author Discography with two years new Regex(@"^(?.+?)\W*(?Discography|Discografia).+?(?\d{4}).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author Discography with end year new Regex(@"^(?.+?)\W*(?Discography|Discografia).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author Discography new Regex(@"^(?.+?)\W*(?Discography|Discografia)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //MyAnonaMouse - Title by Author [lang / pdf] new Regex(@"^(?.+)\bby\b(?.+?)(?:\[|\()", RegexOptions.IgnoreCase | RegexOptions.Compiled), //ruTracker - (Genre) [Source]? Author - Book - Year new Regex(@"^(?:\(.+?\))(?:\W*(?:\[(?.+?)\]))?\W*(?.+?)(?: - )(?.+?)(?: - )(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author-Book-Version-Source-Year //ex. Imagine Dragons-Smoke And Mirrors-Deluxe Edition-2CD-FLAC-2015-JLM new Regex(@"^(?.+?)[-](?.+?)[-](?:[\(|\[]?)(?.+?(?:Edition)?)(?:[\)|\]]?)[-](?\d?CD|WEB).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author-Book-Source-Year //ex. Dani_Sbert-Togheter-WEB-2017-FURY new Regex(@"^(?.+?)[-](?.+?)[-](?\d?CD|WEB).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author - Book (Year) Strict new Regex(@"^(?:(?.+?)(?: - )+)(?.+?)\W*(?:\(|\[).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author - Book (Year) new Regex(@"^(?:(?.+?)(?: - )+)(?.+?)\W*(?:\(|\[)(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author - Book - Year [something] new Regex(@"^(?:(?.+?)(?: - )+)(?.+?)\W*(?: - )(?\d{4})\W*(?:\(|\[)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author - Book [something] or Author - Book (something) new Regex(@"^(?:(?.+?)(?: - )+)(?.+?)\W*(?:\(|\[)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author - Book Year new Regex(@"^(?:(?.+?)(?: - )+)(?.+?)\W*(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author-Book (Year) Strict //Hyphen no space between author and book new Regex(@"^(?:(?.+?)(?:-)+)(?.+?)\W*(?:\(|\[).+?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author-Book (Year) //Hyphen no space between author and book new Regex(@"^(?:(?.+?)(?:-)+)(?.+?)\W*(?:\(|\[)(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author-Book [something] or Author-Book (something) //Hyphen no space between author and book new Regex(@"^(?:(?.+?)(?:-)+)(?.+?)\W*(?:\(|\[)", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author-Book-something-Year new Regex(@"^(?:(?.+?)(?:-)+)(?.+?)(?:-.+?)(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author-Book Year //Hyphen no space between author and book new Regex(@"^(?:(?.+?)(?:-)+)(?:(?.+?)(?:-)+)(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled), //Author - Year - Book // Hypen with no or more spaces between author/book/year new Regex(@"^(?:(?.+?)(?:-))(?\d{4})(?:-)(?[^-]+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), }; private static readonly Regex[] RejectHashedReleasesRegex = new Regex[] { // Generic match for md5 and mixed-case hashes. new Regex(@"^[0-9a-zA-Z]{32}", RegexOptions.Compiled), // Generic match for shorter lower-case hashes. new Regex(@"^[a-z0-9]{24}$", RegexOptions.Compiled), // Format seen on some NZBGeek releases // Be very strict with these coz they are very close to the valid 101 ep numbering. new Regex(@"^[A-Z]{11}\d{3}$", RegexOptions.Compiled), new Regex(@"^[a-z]{12}\d{3}$", RegexOptions.Compiled), //Backup filename (Unknown origins) new Regex(@"^Backup_\d{5,}S\d{2}-\d{2}$", RegexOptions.Compiled), //123 - Started appearing December 2014 new Regex(@"^123$", RegexOptions.Compiled), //abc - Started appearing January 2015 new Regex(@"^abc$", RegexOptions.Compiled | RegexOptions.IgnoreCase), //b00bs - Started appearing January 2015 new Regex(@"^b00bs$", RegexOptions.Compiled | RegexOptions.IgnoreCase) }; private static readonly RegexReplace NormalizeRegex = new RegexReplace(@"((?:\b|_)(?\d{4})[_.-](?[0-1][0-9])[_.-](?[0-3][0-9])|(?[0-1][0-9])[_.-](?[0-3][0-9])[_.-](?\d{4}))(?!\d)", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex SixDigitAirDateRegex = new Regex(@"(?<=[_.-])(?(?[1-9]\d{1})(?[0-1][0-9])(?[0-3][0-9]))(?=[_.-])", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly RegexReplace CleanReleaseGroupRegex = new RegexReplace(@"^(.*?[-._ ])|(-(RP|1|NZBGeek|Obfuscated|Scrambled|sample|Pre|postbot|xpost|Rakuv[a-z0-9]*|WhiteRev|BUYMORE|AsRequested|AlternativeToRequested|GEROV|Z0iDS3N|Chamele0n|4P|4Planet))+$", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly RegexReplace CleanTorrentSuffixRegex = new RegexReplace(@"\[(?:ettv|rartv|rarbg|cttv)\]$", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex ReleaseGroupRegex = new Regex(@"-(?[a-z0-9]+)(?(?!\s).+?(?.+?)(?:\W|_)?(?\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly HashSet WordDelimiters = new HashSet(" .,_-=()[]|\"`'’"); private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|\(|\)|\[|\]|\|)+", RegexOptions.Compiled); private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled); private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition|christmas)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled); private static readonly Regex RequestInfoRegex = new Regex(@"\[.+?\]", RegexOptions.Compiled); private static readonly string[] Numbers = new[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" }; private static readonly Regex[] CommonTagRegex = new Regex[] { new Regex(@"(\[|\()*\b((featuring|feat.|feat|ft|ft.)\s{1}){1}\s*.*(\]|\))*", RegexOptions.IgnoreCase | RegexOptions.Compiled), new Regex(@"(?:\(|\[)(?:[^\(\[]*)(?:version|limited|deluxe|single|clean|book|special|bonus|promo|remastered)(?:[^\)\]]*)(?:\)|\])", RegexOptions.IgnoreCase | RegexOptions.Compiled) }; private static readonly Regex[] BracketRegex = new Regex[] { new Regex(@"\(.*\)", RegexOptions.Compiled), new Regex(@"\[.*\]", RegexOptions.Compiled) }; private static readonly Regex AfterDashRegex = new Regex(@"[-:].*", RegexOptions.Compiled); public static ParsedTrackInfo ParseMusicPath(string path) { var fileInfo = new FileInfo(path); ParsedTrackInfo result = null; Logger.Debug("Attempting to parse book info using directory and file names. {0}", fileInfo.Directory.Name); result = ParseTitle(fileInfo.Directory.Name + " " + fileInfo.Name); if (result == null) { Logger.Debug("Attempting to parse book info using directory name. {0}", fileInfo.Directory.Name); result = ParseTitle(fileInfo.Directory.Name + fileInfo.Extension); } return result; } public static ParsedTrackInfo ParseTitle(string title) { try { if (!ValidateBeforeParsing(title)) { return null; } Logger.Debug("Parsing string '{0}'", title); var releaseTitle = RemoveFileExtension(title); releaseTitle = releaseTitle.Replace("【", "[").Replace("】", "]"); var simpleTitle = SimpleTitleRegex.Replace(releaseTitle); // TODO: Quick fix stripping [url] - prefixes. simpleTitle = WebsitePrefixRegex.Replace(simpleTitle); simpleTitle = WebsitePostfixRegex.Replace(simpleTitle); simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle); var airDateMatch = AirDateRegex.Match(simpleTitle); if (airDateMatch.Success) { simpleTitle = airDateMatch.Groups[1].Value + airDateMatch.Groups["airyear"].Value + "." + airDateMatch.Groups["airmonth"].Value + "." + airDateMatch.Groups["airday"].Value; } var sixDigitAirDateMatch = SixDigitAirDateRegex.Match(simpleTitle); if (sixDigitAirDateMatch.Success) { var airYear = sixDigitAirDateMatch.Groups["airyear"].Value; var airMonth = sixDigitAirDateMatch.Groups["airmonth"].Value; var airDay = sixDigitAirDateMatch.Groups["airday"].Value; if (airMonth != "00" || airDay != "00") { var fixedDate = string.Format("20{0}.{1}.{2}", airYear, airMonth, airDay); simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate); } } foreach (var regex in ReportMusicTitleRegex) { var match = regex.Matches(simpleTitle); if (match.Count != 0) { Logger.Trace(regex); try { var result = ParseMatchMusicCollection(match); if (result != null) { result.Quality = QualityParser.ParseQuality(title); Logger.Debug("Quality parsed: {0}", result.Quality); return result; } } catch (InvalidDateException ex) { Logger.Debug(ex, ex.Message); break; } } } } catch (Exception e) { if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc")) { Logger.Error(e, "An error has occurred while trying to parse {0}", title); } } Logger.Debug("Unable to parse {0}", title); return null; } public static ParsedBookInfo ParseBookTitleWithSearchCriteria(string title, Author author, List books) { try { if (!ValidateBeforeParsing(title)) { return null; } var authorName = author.Name == "Various Authors" ? "VA" : author.Name.RemoveAccent(); Logger.Debug("Parsing string '{0}' using search criteria author: '{1}' books: '{2}'", title, authorName.RemoveAccent(), string.Join(", ", books.Select(a => a.Title.RemoveAccent()))); var releaseTitle = RemoveFileExtension(title); var simpleTitle = SimpleTitleRegex.Replace(releaseTitle); simpleTitle = WebsitePrefixRegex.Replace(simpleTitle); simpleTitle = WebsitePostfixRegex.Replace(simpleTitle); simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle); var bestBook = books .OrderByDescending(x => simpleTitle.FuzzyMatch(x.Editions.Value.Single(x => x.Monitored).Title, wordDelimiters: WordDelimiters)) .First() .Editions.Value .Single(x => x.Monitored); var foundAuthor = GetTitleFuzzy(simpleTitle, authorName, out var remainder); if (foundAuthor == null) { foundAuthor = GetTitleFuzzy(simpleTitle, authorName.ToLastFirst(), out remainder); } var foundBook = GetTitleFuzzy(remainder, bestBook.Title, out _); if (foundBook == null) { foundBook = GetTitleFuzzy(remainder, bestBook.Title.SplitBookTitle(authorName).Item1, out _); } Logger.Trace($"Found {foundAuthor} - {foundBook} with fuzzy parser"); if (foundAuthor == null || foundBook == null) { return null; } var result = new ParsedBookInfo { AuthorName = foundAuthor, AuthorTitleInfo = GetAuthorTitleInfo(foundAuthor), BookTitle = foundBook }; try { result.Quality = QualityParser.ParseQuality(title); Logger.Debug("Quality parsed: {0}", result.Quality); result.ReleaseGroup = ParseReleaseGroup(releaseTitle); Logger.Debug("Release Group parsed: {0}", result.ReleaseGroup); return result; } catch (InvalidDateException ex) { Logger.Debug(ex, ex.Message); } } catch (Exception e) { if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc")) { Logger.Error(e, "An error has occurred while trying to parse {0}", title); } } Logger.Debug("Unable to parse {0}", title); return null; } public static string GetTitleFuzzy(string report, string name, out string remainder) { remainder = report; Logger.Trace($"Finding '{name}' in '{report}'"); var (locStart, matchLength, score) = report.ToLowerInvariant().FuzzyMatch(name.ToLowerInvariant(), 0.6, WordDelimiters); if (locStart == -1) { return null; } var found = report.Substring(locStart, matchLength); if (score >= 0.8) { remainder = report.Remove(locStart, matchLength); return found.Replace('.', ' ').Replace('_', ' '); } return null; } public static ParsedBookInfo ParseBookTitle(string title) { try { if (!ValidateBeforeParsing(title)) { return null; } Logger.Debug("Parsing string '{0}'", title); var releaseTitle = RemoveFileExtension(title); var simpleTitle = SimpleTitleRegex.Replace(releaseTitle); // TODO: Quick fix stripping [url] - prefixes. simpleTitle = WebsitePrefixRegex.Replace(simpleTitle); simpleTitle = WebsitePostfixRegex.Replace(simpleTitle); simpleTitle = CleanTorrentSuffixRegex.Replace(simpleTitle); var airDateMatch = AirDateRegex.Match(simpleTitle); if (airDateMatch.Success) { simpleTitle = airDateMatch.Groups[1].Value + airDateMatch.Groups["airyear"].Value + "." + airDateMatch.Groups["airmonth"].Value + "." + airDateMatch.Groups["airday"].Value; } var sixDigitAirDateMatch = SixDigitAirDateRegex.Match(simpleTitle); if (sixDigitAirDateMatch.Success) { var airYear = sixDigitAirDateMatch.Groups["airyear"].Value; var airMonth = sixDigitAirDateMatch.Groups["airmonth"].Value; var airDay = sixDigitAirDateMatch.Groups["airday"].Value; if (airMonth != "00" || airDay != "00") { var fixedDate = string.Format("20{0}.{1}.{2}", airYear, airMonth, airDay); simpleTitle = simpleTitle.Replace(sixDigitAirDateMatch.Groups["airdate"].Value, fixedDate); } } foreach (var regex in ReportBookTitleRegex) { var match = regex.Matches(simpleTitle); if (match.Count != 0) { Logger.Trace(regex); try { var result = ParseBookMatchCollection(match, releaseTitle); if (result != null) { result.Quality = QualityParser.ParseQuality(title); Logger.Debug("Quality parsed: {0}", result.Quality); result.ReleaseGroup = ParseReleaseGroup(releaseTitle); var subGroup = GetSubGroup(match); if (!subGroup.IsNullOrWhiteSpace()) { result.ReleaseGroup = subGroup; } Logger.Debug("Release Group parsed: {0}", result.ReleaseGroup); result.ReleaseHash = GetReleaseHash(match); if (!result.ReleaseHash.IsNullOrWhiteSpace()) { Logger.Debug("Release Hash parsed: {0}", result.ReleaseHash); } return result; } } catch (InvalidDateException ex) { Logger.Debug(ex, ex.Message); break; } } } } catch (Exception e) { if (!title.ToLower().Contains("password") && !title.ToLower().Contains("yenc")) { Logger.Error(e, "An error has occurred while trying to parse {0}", title); } } Logger.Debug("Unable to parse {0}", title); return null; } public static (string, string) SplitBookTitle(this string book, string author) { // Strip author from title, eg Tom Clancy: Ghost Protocol if (book.StartsWith($"{author}:")) { book = book.Split(':', 2)[1].Trim(); } var parenthesis = book.IndexOf('('); var colon = book.IndexOf(':'); string[] parts = null; if (parenthesis > -1) { var endParenthesis = book.IndexOf(')', parenthesis); if (endParenthesis == -1 || !book.Substring(parenthesis + 1, endParenthesis - parenthesis).Contains(' ')) { parenthesis = -1; } } if (colon > -1 && parenthesis > -1) { if (colon < parenthesis) { parts = book.Split(':', 2); } else { parts = book.Split('(', 2); parts[1] = parts[1].TrimEnd(')'); } } else if (colon > -1) { parts = book.Split(':', 2); } else if (parenthesis > -1) { parts = book.Split('('); parts[1] = parts[1].TrimEnd(')'); } if (parts != null) { return (parts[0].Trim(), parts[1].TrimEnd(':').Trim()); } return (book, string.Empty); } public static string CleanAuthorName(this string name) { if (name.IsNullOrWhiteSpace()) { return string.Empty; } // If Title only contains numbers return it as is. if (long.TryParse(name, out _)) { return name; } name = PercentRegex.Replace(name, "percent"); return NormalizeRegex.Replace(name).ToLower().RemoveAccent(); } public static string NormalizeTrackTitle(this string title) { title = SpecialEpisodeWordRegex.Replace(title, string.Empty); title = PunctuationRegex.Replace(title, " "); title = DuplicateSpacesRegex.Replace(title, " "); return title.Trim().ToLower(); } public static string NormalizeTitle(string title) { title = WordDelimiterRegex.Replace(title, " "); title = PunctuationRegex.Replace(title, string.Empty); title = CommonWordRegex.Replace(title, string.Empty); title = DuplicateSpacesRegex.Replace(title, " "); return title.Trim().ToLower(); } public static string ParseReleaseGroup(string title) { title = title.Trim(); title = RemoveFileExtension(title); title = WebsitePrefixRegex.Replace(title); var animeMatch = AnimeReleaseGroupRegex.Match(title); if (animeMatch.Success) { return animeMatch.Groups["subgroup"].Value; } title = CleanReleaseGroupRegex.Replace(title); var matches = ReleaseGroupRegex.Matches(title); if (matches.Count != 0) { var group = matches.OfType().Last().Groups["releasegroup"].Value; if (int.TryParse(group, out _)) { return null; } return group; } return null; } public static string RemoveFileExtension(string title) { title = FileExtensionRegex.Replace(title, m => { var extension = m.Value.ToLower(); if (MediaFiles.MediaFileExtensions.AllExtensions.Contains(extension) || new[] { ".par2", ".nzb" }.Contains(extension)) { return string.Empty; } return m.Value; }); return title; } public static string CleanBookTitle(this string book) { return CommonTagRegex[1].Replace(book, string.Empty).Trim(); } public static string RemoveBracketsAndContents(this string book) { var intermediate = book; foreach (var regex in BracketRegex) { intermediate = regex.Replace(intermediate, string.Empty).Trim(); } return intermediate; } public static string RemoveAfterDash(this string text) { return AfterDashRegex.Replace(text, string.Empty).Trim(); } public static string CleanTrackTitle(this string title) { var intermediateTitle = title; foreach (var regex in CommonTagRegex) { intermediateTitle = regex.Replace(intermediateTitle, string.Empty).Trim(); } return intermediateTitle; } private static ParsedTrackInfo ParseMatchMusicCollection(MatchCollection matchCollection) { var authorName = matchCollection[0].Groups["author"].Value./*Removed for cases like Will.I.Am Replace('.', ' ').*/Replace('_', ' '); authorName = RequestInfoRegex.Replace(authorName, "").Trim(' '); // Coppied from Radarr (https://github.com/Radarr/Radarr/blob/develop/src/NzbDrone.Core/Parser/Parser.cs) // TODO: Split into separate method and write unit tests for. var parts = authorName.Split('.'); authorName = ""; var n = 0; var previousAcronym = false; var nextPart = ""; foreach (var part in parts) { if (parts.Length >= n + 2) { nextPart = parts[n + 1]; } if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out n)) { authorName += part + "."; previousAcronym = true; } else if (part.ToLower() == "a" && (previousAcronym == true || nextPart.Length == 1)) { authorName += part + "."; previousAcronym = true; } else { if (previousAcronym) { authorName += " "; previousAcronym = false; } authorName += part + " "; } n++; } authorName = authorName.Trim(' '); var result = new ParsedTrackInfo(); result.Authors = new List { authorName }; Logger.Debug("Track Parsed. {0}", result); return result; } private static AuthorTitleInfo GetAuthorTitleInfo(string title) { var authorTitleInfo = new AuthorTitleInfo(); authorTitleInfo.Title = title; return authorTitleInfo; } public static string ParseAuthorName(string title) { Logger.Debug("Parsing string '{0}'", title); var parseResult = ParseBookTitle(title); if (parseResult == null) { return CleanAuthorName(title); } return parseResult.AuthorName; } private static ParsedBookInfo ParseBookMatchCollection(MatchCollection matchCollection, string releaseTitle) { var authorName = matchCollection[0].Groups["author"].Value.Replace('.', ' ').Replace('_', ' '); var bookTitle = matchCollection[0].Groups["book"].Value.Replace('.', ' ').Replace('_', ' '); var releaseVersion = matchCollection[0].Groups["version"].Value.Replace('.', ' ').Replace('_', ' '); authorName = RequestInfoRegex.Replace(authorName, "").Trim(' '); bookTitle = RequestInfoRegex.Replace(bookTitle, "").Trim(' '); releaseVersion = RequestInfoRegex.Replace(releaseVersion, "").Trim(' '); int.TryParse(matchCollection[0].Groups["releaseyear"].Value, out var releaseYear); ParsedBookInfo result; result = new ParsedBookInfo { ReleaseTitle = releaseTitle }; result.AuthorName = authorName; result.BookTitle = bookTitle; result.AuthorTitleInfo = GetAuthorTitleInfo(result.AuthorName); result.ReleaseDate = releaseYear.ToString(); result.ReleaseVersion = releaseVersion; if (matchCollection[0].Groups["discography"].Success) { int.TryParse(matchCollection[0].Groups["startyear"].Value, out var discStart); int.TryParse(matchCollection[0].Groups["endyear"].Value, out var discEnd); result.Discography = true; if (discStart > 0 && discEnd > 0) { result.DiscographyStart = discStart; result.DiscographyEnd = discEnd; } else if (discEnd > 0) { result.DiscographyEnd = discEnd; } result.BookTitle = "Discography"; } Logger.Debug("Book Parsed. {0}", result); return result; } private static bool ValidateBeforeParsing(string title) { if (title.ToLower().Contains("password") && title.ToLower().Contains("yenc")) { Logger.Debug(""); return false; } if (!title.Any(char.IsLetterOrDigit)) { return false; } var titleWithoutExtension = RemoveFileExtension(title); if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension))) { Logger.Debug("Rejected Hashed Release Title: " + title); return false; } return true; } private static string GetSubGroup(MatchCollection matchCollection) { var subGroup = matchCollection[0].Groups["subgroup"]; if (subGroup.Success) { return subGroup.Value; } return string.Empty; } private static string GetReleaseHash(MatchCollection matchCollection) { var hash = matchCollection[0].Groups["hash"]; if (hash.Success) { var hashValue = hash.Value.Trim('[', ']'); if (hashValue.Equals("1280x720")) { return string.Empty; } return hashValue; } return string.Empty; } private static int ParseNumber(string value) { if (int.TryParse(value, out var number)) { return number; } number = Array.IndexOf(Numbers, value.ToLower()); if (number != -1) { return number; } throw new FormatException(string.Format("{0} isn't a number", value)); } } }