mpv2oboeru

mpv helpers to create flashcards from movies and TV shows
git clone anongit@rnpnr.xyz:mpv2oboeru.git
Log | Files | Refs | Feed | README | LICENSE

Commit: f665a5e22a8aa2db252263d59ff67b262af514d1
Parent: 4209367a1035e3529e89c5f95bc9bc7951a6dba1
Author: Ren Tatsumoto
Date:   Tue, 29 Mar 2022 21:59:42 +0000

Merge pull request #67 from Luukuton/episode-num-parsing

Fix episode number parsing and add tests
Diffstat:
Mconfig.lua | 9+++------
Ahelpers.lua | 32++++++++++++++++++++++++++++++++
Msubs2srs.lua | 60+++++++++++++++++-------------------------------------------
Atest.lua | 32++++++++++++++++++++++++++++++++
4 files changed, 84 insertions(+), 49 deletions(-)

diff --git a/config.lua b/config.lua @@ -1,14 +1,11 @@ local mpopt = require('mp.options') +local helpers = require('helpers') local initial_config = {} local default_profile_filename = 'subs2srs' local profiles_filename = 'subs2srs_profiles' local config, profiles -local function is_empty(var) - return var == nil or var == '' or (type(var) == 'table' and next(var) == nil) -end - local function set_audio_format() if config.audio_format == 'opus' then config.audio_codec = 'libopus' @@ -57,9 +54,9 @@ local function validate_config() end local function load_profile(profile_name) - if is_empty(profile_name) then + if helpers.is_empty(profile_name) then profile_name = profiles.active - if is_empty(profile_name) then + if helpers.is_empty(profile_name) then profile_name = default_profile_filename end end diff --git a/helpers.lua b/helpers.lua @@ -0,0 +1,32 @@ +local function is_empty(var) + return var == nil or var == '' or (type(var) == 'table' and next(var) == nil) +end + +local function get_episode_number(filename) + -- Reverses the filename to start the search from the end as the media title might contain similar numbers. + local filename_reversed = filename:reverse() + + local ep_num_patterns = { + "[%s_](%d?%d?%d)[pP]?[eE]", -- Starting with E or EP (case-insensitive). "Example Series S01E01 [94Z295D1]" + "^(%d?%d?%d)[pP]?[eE]", -- Starting with E or EP (case-insensitive) at the end of filename. "Example Series S01E01" + "%)(%d?%d?%d)%(", -- Surrounded by parentheses. "Example Series (12)" + "%](%d?%d?%d)%[", -- Surrounded by brackets. "Example Series [01]" + "%s(%d?%d?%d)%s", -- Surrounded by whitespace. "Example Series 124 [1080p 10-bit]" + "_(%d?%d?%d)_", -- Surrounded by underscores. "Example_Series_04_1080p" + "^(%d?%d?%d)[%s_]", -- Ending to the episode number. "Example Series 124" + "(%d?%d?%d)%-edosipE", -- Prepended by "Episode-". "Example Episode-165" + } + + local s, e, episode_num + for _, pattern in pairs(ep_num_patterns) do + s, e, episode_num = string.find(filename_reversed, pattern) + if not is_empty(episode_num) then + return #filename - e, #filename - s, episode_num:reverse() + end + end +end + +return { + is_empty = is_empty, + get_episode_number = get_episode_number, +} diff --git a/subs2srs.lua b/subs2srs.lua @@ -106,6 +106,7 @@ local msg = require('mp.msg') local OSD = require('osd_styler') local config_manager = require('config') local encoder = require('encoder') +local helpers = require('helpers') -- namespaces local subs @@ -162,10 +163,6 @@ function table.get(table, key, default) end end -local function is_empty(var) - return var == nil or var == '' or (type(var) == 'table' and next(var) == nil) -end - local function is_running_windows() return mp.get_property('options/vo-mmcss-profile') ~= nil end @@ -268,7 +265,7 @@ local function trim(str) end local function copy_to_clipboard(_, text) - if not is_empty(text) then + if not helpers.is_empty(text) then text = config.clipboard_trim_enabled and trim(text) or remove_newlines(text) platform.copy_to_clipboard(text) end @@ -363,36 +360,13 @@ local function load_next_profile() notify("Loaded profile " .. profiles.active) end -local function get_episode_number(filename) - -- Reverses the filename to start the search from the end as the media title might contain similar numbers. - local filename_reversed = filename:reverse() - - local ep_num_patterns = { - "%s?(%d?%d?%d)[pP]?[eE]", -- Starting with E or EP (case-insensitive). "Example Series S01E01" - "%)(%d?%d?%d)%(", -- Surrounded by parentheses. "Example Series (12)" - "%](%d?%d?%d)%[", -- Surrounded by brackets. "Example Series [01]" - "%s(%d?%d?%d)%s", -- Surrounded by whitespace. "Example Series 124 [1080p 10-bit]" - "_(%d?%d?%d)_", -- Surrounded by underscores. "Example_Series_04_1080p" - "^(%d?%d?%d)[%s_]", -- Ending to the episode number. "Example Series 124" - "(%d?%d?%d)%-edosipE", -- Prepended by "Episode-". "Example Episode-165" - } - - local s, e, episode_num - for _, pattern in pairs(ep_num_patterns) do - s, e, episode_num = string.find(filename_reversed, pattern) - if not is_empty(episode_num) then - return #filename - e, #filename - s, episode_num:reverse() - end - end -end - local function tag_format(filename) filename = remove_extension(filename) filename = remove_common_resolutions(filename) - local s, e, episode_num = get_episode_number(filename) + local s, e, episode_num = helpers.get_episode_number(filename) - if config.tag_del_episode_num == true and not is_empty(s) then + if config.tag_del_episode_num == true and not helpers.is_empty(s) then if config.tag_del_after_episode_num == true then -- Removing everything (e.g. episode name) after the episode number including itself. filename = filename:sub(1, s) @@ -478,10 +452,10 @@ local function update_sentence(new_data, stored_data) -- https://tatsumoto-ren.github.io/blog/discussing-various-card-templates.html#targeted-sentence-cards-or-mpvacious-cards -- if the target word was marked by yomichan, this function makes sure that the highlighting doesn't get erased. - if is_empty(stored_data[config.sentence_field]) then + if helpers.is_empty(stored_data[config.sentence_field]) then -- sentence field is empty. can't continue. return new_data - elseif is_empty(new_data[config.sentence_field]) then + elseif helpers.is_empty(new_data[config.sentence_field]) then -- *new* sentence field is empty, but old one contains data. don't delete the existing sentence. new_data[config.sentence_field] = stored_data[config.sentence_field] return new_data @@ -672,7 +646,7 @@ local function export_to_anki(gui) return end - if not gui and is_empty(sub['text']) then + if not gui and helpers.is_empty(sub['text']) then sub['text'] = string.format("mpvacious wasn't able to grab subtitles (%s)", os.time()) end @@ -695,7 +669,7 @@ local function update_last_note(overwrite) if sub == nil then notify("Nothing to export. Have you set the timings?", "warn", 2) return - elseif is_empty(sub['text']) then + elseif helpers.is_empty(sub['text']) then -- In this case, don't modify whatever existing text there is and just -- modify the other fields we can. The user might be trying to add -- audio to a card which they've manually transcribed (either the video @@ -733,7 +707,7 @@ local function update_last_note(overwrite) -- If the text is still empty, put some dummy text to let the user know why -- there's no text in the sentence field. - if is_empty(new_data[config.sentence_field]) then + if helpers.is_empty(new_data[config.sentence_field]) then new_data[config.sentence_field] = string.format("mpvacious wasn't able to grab subtitles (%s)", os.time()) end @@ -981,7 +955,7 @@ do local function get_forvo_pronunciation(word) local audio_url = get_pronunciation_url(word) - if is_empty(audio_url) then + if helpers.is_empty(audio_url) then msg.warn(string.format("Seems like Forvo doesn't have audio for word %s.", word)) return end @@ -1011,14 +985,14 @@ do return new_data end - if is_empty(stored_data[config.vocab_field]) then + if helpers.is_empty(stored_data[config.vocab_field]) then -- target word field is empty. can't continue. return new_data end - if config.use_forvo == 'always' or is_empty(stored_data[config.vocab_audio_field]) then + if config.use_forvo == 'always' or helpers.is_empty(stored_data[config.vocab_audio_field]) then local forvo_pronunciation = get_forvo_pronunciation(stored_data[config.vocab_field]) - if not is_empty(forvo_pronunciation) then + if not helpers.is_empty(forvo_pronunciation) then if config.vocab_audio_field == config.audio_field then -- improperly configured fields. don't lose sentence audio new_data[config.audio_field] = forvo_pronunciation .. new_data[config.audio_field] @@ -1117,7 +1091,7 @@ end ankiconnect.add_note = function(note_fields, gui) local action = gui and 'guiAddCards' or 'addNote' - local tags = is_empty(config.note_tag) and {} or { substitute_fmt(config.note_tag) } + local tags = helpers.is_empty(config.note_tag) and {} or { substitute_fmt(config.note_tag) } local args = { action = action, version = 6, @@ -1156,7 +1130,7 @@ ankiconnect.get_last_note_id = function() local note_ids, _ = ankiconnect.parse_result(ret) - if not is_empty(note_ids) then + if not helpers.is_empty(note_ids) then return table.max_num(note_ids) else return -1 @@ -1199,7 +1173,7 @@ ankiconnect.gui_browse = function(query) end ankiconnect.add_tag = function(note_id, tag) - if not is_empty(tag) then + if not helpers.is_empty(tag) then tag = substitute_fmt(tag) ankiconnect.execute { action = 'addTags', @@ -1284,7 +1258,7 @@ subs.get = function() if sub['start'] > sub['end'] then sub['start'], sub['end'] = sub['end'], sub['start'] end - if not is_empty(sub['text']) then + if not helpers.is_empty(sub['text']) then sub['text'] = trim(sub['text']) sub['text'] = escape_special_characters(sub['text']) end diff --git a/test.lua b/test.lua @@ -0,0 +1,32 @@ +local helpers = require('helpers') + +local function assert_equals(expected, actual) + if expected ~= actual then + error(string.format("TEST FAILED: Expected '%s', got '%s'", expected, actual)) + end +end + +local function test_get_episode_number() + local test_cases = { + { nil, "A Whisker Away.mkv" }, + { nil, "[Placeholder] Gekijouban SHIROBAKO [Ma10p_1080p][x265_flac]" }, + { "06", "[Placeholder] Sono Bisque Doll wa Koi wo Suru - 06 [54E495D0]" }, + { "02", "(Hi10)_Kobayashi-san_Chi_no_Maid_Dragon_-_02_(BD_1080p)_(Placeholder)_(12C5D2B4)" }, + { "01", "[Placeholder] Koi to Yobu ni wa Kimochi Warui - 01 (1080p) [D517C9F0]" }, + { "01", "[Placeholder] Tsukimonogatari 01 [BD 1080p x264 10-bit FLAC] [5CD88145]" }, + { "01", "[Placeholder] 86 - Eighty Six - 01 (1080p) [1B13598F]" }, + { "00", "[Placeholder] Fate Stay Night - Unlimited Blade Works - 00 (BD 1080p Hi10 FLAC) [95590B7F]" }, + { "01", "House, M.D. S01E01 Pilot - Everybody Lies (1080p x265 Placeholder)" }, + { "165", "A Generic Episode-165" } + } + + for _, case in pairs(test_cases) do + local _, _, episode_num = helpers.get_episode_number(case[2]) + assert_equals(case[1], episode_num) + end +end + +-- Runs tests +test_get_episode_number() + +os.exit(print("Tests passed"))