Improved detection of hashed releases.

- Added specific rejection code for 32 random characters to ignore all md5 and mixed-case hashes.
- Added NZBGeek hash format.
- Removed -RP from Release Group.
- Added test and fix for reversed title. Currently matching it based on a couple of patterns.
This commit is contained in:
Taloth Saldono
2014-04-18 01:16:40 +02:00
parent 9c858445a3
commit 5428d9d53f
5 changed files with 131 additions and 9 deletions
+49 -6
View File
@@ -43,7 +43,7 @@ namespace NzbDrone.Core.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes without a title, Single (S01E05, 1x05) AND Multi (S01E04E05, 1x04x05, etc)
new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))+(?![\da-z]))",
new Regex(@"^(?:S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{2,3}(?!\d+)))+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
@@ -92,7 +92,7 @@ namespace NzbDrone.Core.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Episodes with a title, Single episodes (S01E05, 1x05, etc) & Multi-episode (S01E05E06, S01E05-06, S01E05 E06, etc)
new Regex(@"^(?<title>.+?)(?:(\W|_)+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+(?![\da-z]))\W?(?!\\)",
new Regex(@"^(?<title>.+?)(?:(\W|_)+S?(?<season>(?<!\d+)\d{1,2}(?!\d+))(?:(?:\-|[ex]|\W[ex]|_){1,2}(?<episode>\d{4}(?!\d+|i|p)))+)\W?(?!\\)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
//Anime - Title Absolute Episode Number
@@ -100,6 +100,18 @@ namespace NzbDrone.Core.Parser
RegexOptions.IgnoreCase | RegexOptions.Compiled)
};
private static readonly Regex[] RejectHashedReleasesRegex = new Regex[]
{
// Generic match for md5 and mixed-case hashes.
new Regex(@"^[0-9a-zA-Z]{32}", RegexOptions.Compiled),
// Format seen on some NZBGeek releases
new Regex(@"^[A-Z]{11}\d{3}$", RegexOptions.Compiled)
};
//Regex to detect whether the title was reversed.
private static readonly Regex ReversedTitleRegex = new Regex(@"\.p027\.|\.p0801\.|\.\d{2}E\d{2}S\.", RegexOptions.Compiled);
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^)(a|an|the|and|or|of)(?:\b|_))|\W|_",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
@@ -155,6 +167,17 @@ namespace NzbDrone.Core.Parser
if (!ValidateBeforeParsing(title)) return null;
Logger.Debug("Parsing string '{0}'", title);
if (ReversedTitleRegex.IsMatch(title))
{
var titleWithoutExtension = RemoveFileExtension(title).ToCharArray();
Array.Reverse(titleWithoutExtension);
title = new string(titleWithoutExtension) + title.Substring(titleWithoutExtension.Length);
Logger.Debug("Reversed name detected. Converted to '{0}'", title);
}
var simpleTitle = SimpleTitleRegex.Replace(title, String.Empty);
foreach (var regex in ReportTitleRegex)
@@ -245,10 +268,9 @@ namespace NzbDrone.Core.Parser
title = title.Trim();
if (!title.ContainsInvalidPathChars() && MediaFiles.MediaFileExtensions.Extensions.Contains(Path.GetExtension(title).ToLower()))
{
title = Path.GetFileNameWithoutExtension(title).Trim();
}
title = RemoveFileExtension(title);
title = title.TrimEnd("-RP");
var index = title.LastIndexOf('-');
@@ -275,6 +297,19 @@ namespace NzbDrone.Core.Parser
return group;
}
public static string RemoveFileExtension(string title)
{
if (!title.ContainsInvalidPathChars())
{
if (MediaFiles.MediaFileExtensions.Extensions.Contains(Path.GetExtension(title).ToLower()))
{
title = Path.Combine(Path.GetDirectoryName(title), Path.GetFileNameWithoutExtension(title));
}
}
return title;
}
private static SeriesTitleInfo GetSeriesTitleInfo(string title)
{
var seriesTitleInfo = new SeriesTitleInfo();
@@ -511,6 +546,14 @@ namespace NzbDrone.Core.Parser
return false;
}
var titleWithoutExtension = RemoveFileExtension(title);
if (RejectHashedReleasesRegex.Any(v => v.IsMatch(titleWithoutExtension)))
{
Logger.Debug("Rejected Hashed Release Title: " + title);
return false;
}
return true;
}
}