1
0
mirror of https://github.com/Radarr/Radarr.git synced 2026-04-23 22:25:14 -04:00

New: Parse existing subtitles and extra files

Towards #459
This commit is contained in:
Mark McDowall
2015-12-25 01:22:00 -08:00
parent 816cf608fc
commit 2e96c4e798
78 changed files with 2013 additions and 678 deletions
+16
View File
@@ -0,0 +1,16 @@
namespace NzbDrone.Core.Parser
{
public class IsoLanguage
{
public string TwoLetterCode { get; set; }
public string ThreeLetterCode { get; set; }
public Language Language { get; set; }
public IsoLanguage(string twoLetterCode, string threeLetterCode, Language language)
{
TwoLetterCode = twoLetterCode;
ThreeLetterCode = threeLetterCode;
Language = language;
}
}
}
+55
View File
@@ -0,0 +1,55 @@
using System.Collections.Generic;
using System.Linq;
namespace NzbDrone.Core.Parser
{
public static class IsoLanguages
{
private static readonly HashSet<IsoLanguage> All = new HashSet<IsoLanguage>
{
new IsoLanguage("en", "eng", Language.English),
new IsoLanguage("fr", "fra", Language.French),
new IsoLanguage("es", "spa", Language.Spanish),
new IsoLanguage("de", "deu", Language.German),
new IsoLanguage("it", "ita", Language.Italian),
new IsoLanguage("da", "dan", Language.Danish),
new IsoLanguage("nl", "nld", Language.Dutch),
new IsoLanguage("ja", "jpn", Language.Japanese),
// new IsoLanguage("", "", Language.Cantonese),
// new IsoLanguage("", "", Language.Mandarin),
new IsoLanguage("ru", "rus", Language.Russian),
new IsoLanguage("pl", "pol", Language.Polish),
new IsoLanguage("vi", "vie", Language.Vietnamese),
new IsoLanguage("sv", "swe", Language.Swedish),
new IsoLanguage("no", "nor", Language.Norwegian),
new IsoLanguage("fi", "fin", Language.Finnish),
new IsoLanguage("tr", "tur", Language.Turkish),
new IsoLanguage("pt", "por", Language.Portuguese),
// new IsoLanguage("nl", "nld", Language.Flemish),
new IsoLanguage("el", "ell", Language.Greek),
new IsoLanguage("ko", "kor", Language.Korean),
new IsoLanguage("hu", "hun", Language.Hungarian)
};
public static IsoLanguage Find(string isoCode)
{
if (isoCode.Length == 2)
{
//Lookup ISO639-1 code
return All.SingleOrDefault(l => l.TwoLetterCode == isoCode);
}
else if (isoCode.Length == 3)
{
//Lookup ISO639-2T code
return All.SingleOrDefault(l => l.ThreeLetterCode == isoCode);
}
return null;
}
public static IsoLanguage Get(Language language)
{
return All.SingleOrDefault(l => l.Language == language);
}
}
}
+137
View File
@@ -0,0 +1,137 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using NLog;
using NzbDrone.Common.Instrumentation;
namespace NzbDrone.Core.Parser
{
public static class LanguageParser
{
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(LanguageParser));
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?<iso_code>[a-z]{2,3})$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public static Language ParseLanguage(string title)
{
var lowerTitle = title.ToLower();
if (lowerTitle.Contains("english"))
return Language.English;
if (lowerTitle.Contains("french"))
return Language.French;
if (lowerTitle.Contains("spanish"))
return Language.Spanish;
if (lowerTitle.Contains("danish"))
return Language.Danish;
if (lowerTitle.Contains("dutch"))
return Language.Dutch;
if (lowerTitle.Contains("japanese"))
return Language.Japanese;
if (lowerTitle.Contains("cantonese"))
return Language.Cantonese;
if (lowerTitle.Contains("mandarin"))
return Language.Mandarin;
if (lowerTitle.Contains("korean"))
return Language.Korean;
if (lowerTitle.Contains("russian"))
return Language.Russian;
if (lowerTitle.Contains("polish"))
return Language.Polish;
if (lowerTitle.Contains("vietnamese"))
return Language.Vietnamese;
if (lowerTitle.Contains("swedish"))
return Language.Swedish;
if (lowerTitle.Contains("norwegian"))
return Language.Norwegian;
if (lowerTitle.Contains("nordic"))
return Language.Norwegian;
if (lowerTitle.Contains("finnish"))
return Language.Finnish;
if (lowerTitle.Contains("turkish"))
return Language.Turkish;
if (lowerTitle.Contains("portuguese"))
return Language.Portuguese;
if (lowerTitle.Contains("hungarian"))
return Language.Hungarian;
var match = LanguageRegex.Match(title);
if (match.Groups["italian"].Captures.Cast<Capture>().Any())
return Language.Italian;
if (match.Groups["german"].Captures.Cast<Capture>().Any())
return Language.German;
if (match.Groups["flemish"].Captures.Cast<Capture>().Any())
return Language.Flemish;
if (match.Groups["greek"].Captures.Cast<Capture>().Any())
return Language.Greek;
if (match.Groups["french"].Success)
return Language.French;
if (match.Groups["russian"].Success)
return Language.Russian;
if (match.Groups["dutch"].Success)
return Language.Dutch;
if (match.Groups["hungarian"].Success)
return Language.Hungarian;
return Language.English;
}
public static Language ParseSubtitleLanguage(string fileName)
{
try
{
Logger.Debug("Parsing language from subtitlte file: {0}", fileName);
var simpleFilename = Path.GetFileNameWithoutExtension(fileName);
var languageMatch = SubtitleLanguageRegex.Match(simpleFilename);
if (languageMatch.Success)
{
var isoCode = languageMatch.Groups["iso_code"].Value;
var isoLanguage = IsoLanguages.Find(isoCode);
return isoLanguage?.Language ?? Language.Unknown;
}
Logger.Debug("Unable to parse langauge from subtitle file: {0}", fileName);
}
catch (Exception ex)
{
Logger.Debug("Failed parsing langauge from subtitle file: {0}", fileName);
}
return Language.Unknown;
}
}
}
+2 -95
View File
@@ -248,9 +248,6 @@ namespace NzbDrone.Core.Parser
private static readonly Regex AnimeReleaseGroupRegex = new Regex(@"^(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex YearInTitleRegex = new Regex(@"^(?<title>.+?)(?:\W|_)?(?<year>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
@@ -358,7 +355,7 @@ namespace NzbDrone.Core.Parser
result.Special = true;
}
result.Language = ParseLanguage(title);
result.Language = LanguageParser.ParseLanguage(title);
Logger.Debug("Language parsed: {0}", result.Language);
result.Quality = QualityParser.ParseQuality(title);
@@ -493,97 +490,7 @@ namespace NzbDrone.Core.Parser
return title;
}
public static Language ParseLanguage(string title)
{
var lowerTitle = title.ToLower();
if (lowerTitle.Contains("english"))
return Language.English;
if (lowerTitle.Contains("french"))
return Language.French;
if (lowerTitle.Contains("spanish"))
return Language.Spanish;
if (lowerTitle.Contains("danish"))
return Language.Danish;
if (lowerTitle.Contains("dutch"))
return Language.Dutch;
if (lowerTitle.Contains("japanese"))
return Language.Japanese;
if (lowerTitle.Contains("cantonese"))
return Language.Cantonese;
if (lowerTitle.Contains("mandarin"))
return Language.Mandarin;
if (lowerTitle.Contains("korean"))
return Language.Korean;
if (lowerTitle.Contains("russian"))
return Language.Russian;
if (lowerTitle.Contains("polish"))
return Language.Polish;
if (lowerTitle.Contains("vietnamese"))
return Language.Vietnamese;
if (lowerTitle.Contains("swedish"))
return Language.Swedish;
if (lowerTitle.Contains("norwegian"))
return Language.Norwegian;
if (lowerTitle.Contains("nordic"))
return Language.Norwegian;
if (lowerTitle.Contains("finnish"))
return Language.Finnish;
if (lowerTitle.Contains("turkish"))
return Language.Turkish;
if (lowerTitle.Contains("portuguese"))
return Language.Portuguese;
if (lowerTitle.Contains("hungarian"))
return Language.Hungarian;
var match = LanguageRegex.Match(title);
if (match.Groups["italian"].Captures.Cast<Capture>().Any())
return Language.Italian;
if (match.Groups["german"].Captures.Cast<Capture>().Any())
return Language.German;
if (match.Groups["flemish"].Captures.Cast<Capture>().Any())
return Language.Flemish;
if (match.Groups["greek"].Captures.Cast<Capture>().Any())
return Language.Greek;
if (match.Groups["french"].Success)
return Language.French;
if (match.Groups["russian"].Success)
return Language.Russian;
if (match.Groups["dutch"].Success)
return Language.Dutch;
if (match.Groups["hungarian"].Success)
return Language.Hungarian;
return Language.English;
}
private static SeriesTitleInfo GetSeriesTitleInfo(string title)
{
var seriesTitleInfo = new SeriesTitleInfo();
+1 -1
View File
@@ -233,7 +233,7 @@ namespace NzbDrone.Core.Parser
info.FullSeason = false;
info.Quality = QualityParser.ParseQuality(title);
info.ReleaseGroup = Parser.ParseReleaseGroup(title);
info.Language = Parser.ParseLanguage(title);
info.Language = LanguageParser.ParseLanguage(title);
info.Special = true;
_logger.Debug("Found special episode {0} for title '{1}'", info, title);