rewrite forvo support - mpv2oboeru - mpv helpers to create flashcards from movies and TV shows

Commit: 7b7412c4f75944b1b95e621241f474eeafd3ba07
Parent: b24689a62a180fc1274460c7fda60452b3cdd4fb
Author: Ren Tatsumoto
Date:   Sun, 13 Dec 2020 22:44:35 +0300

rewrite forvo support

Diffstat:
M subs2srs.lua  | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------

1 file changed, 99 insertions(+), 40 deletions(-)
diff --git a/subs2srs.lua b/subs2srs.lua
@@ -412,46 +412,6 @@ local function join_media_fields(note1, note2)
     return note1
 end
 
-local function get_forvo_pronunciation(word)
-    word = platform.windows and url_encode(word) or word
-    local forvo_format = config.audio_extension:sub(2)
-    local forvo_page = subprocess { 'curl', '-s', string.format('https://forvo.com/search/%s/ja', word) }.stdout
-    local play_params = string.match(forvo_page, "Play%((.-)%);")
-    if not play_params then
-        return nil
-    end
-    local iter = string.gmatch(play_params, "'(.-)'")
-    local formats = { mp3 = iter(), ogg = iter() }
-    local audio_url = string.format('https://audio00.forvo.com/%s/%s', forvo_format, base64d(formats[forvo_format]))
-    local pronunciation_filename = string.format('forvo_%s.%s', platform.windows and os.time() or word, forvo_format)
-    local pronunciation_path = utils.join_path(config.collection_path, pronunciation_filename)
-    mp.commandv('run', 'curl', audio_url, '-s', '-L', '-o', pronunciation_path)
-    return string.format('[sound:%s]', pronunciation_filename)
-end
-
-local function append_forvo_pronunciation(note1, note2)
-    if config.use_forvo == 'no' then
-        return note1
-    end
-    if type(note2[config.vocab_audio_field]) ~= 'string' then
-        return note1
-    end
-    if is_empty(note2[config.vocab_field]) then
-        return note1
-    end
-    if config.use_forvo == 'always' or is_empty(note2[config.vocab_audio_field]) then
-        local forvo_pronunciation = get_forvo_pronunciation(note2[config.vocab_field])
-        if not is_empty(forvo_pronunciation) then
-            if config.vocab_audio_field == config.audio_field then
-                note1[config.vocab_audio_field] = forvo_pronunciation .. note1[config.vocab_audio_field]
-            else
-                note1[config.vocab_audio_field] = forvo_pronunciation
-            end
-        end
-    end
-    return note1
-end
-
 local validate_config
 do
     local function is_webp_supported()
@@ -598,6 +558,103 @@ end
 platform = is_running_windows() and init_platform_windows() or init_platform_nix()
 
 ------------------------------------------------------------
+-- utils for downloading pronunciations from Forvo
+
+local append_forvo_pronunciation
+do
+    local function audio_reencode(source_path, dest_path)
+        local args = {
+            'mpv',
+            source_path,
+            '--loop-file=no',
+            '--video=no',
+            '--no-ocopy-metadata',
+            '--no-sub',
+            '--audio-channels=mono',
+            '--oacopts-add=vbr=on',
+            '--oacopts-add=application=voip',
+            '--oacopts-add=compression_level=10',
+            '--af-append=silenceremove=1:0:-50dB',
+            table.concat { '--oac=', config.audio_codec },
+            table.concat { '--oacopts-add=b=', config.audio_bitrate },
+            table.concat { '-o=', dest_path }
+        }
+        return subprocess(args)
+    end
+
+    local function reencode_and_store(source_path, filename)
+        local reencoded_path = utils.join_path(platform.tmp_dir(), 'reencoded_' .. filename)
+        audio_reencode(source_path, reencoded_path)
+        local result = ankiconnect.store_file(filename, reencoded_path)
+        os.remove(reencoded_path)
+        return result
+    end
+
+    local function curl_save(source_url, save_location)
+        local curl_args = { 'curl', source_url, '-s', '-L', '-o', save_location }
+        return subprocess(curl_args).status == 0
+    end
+
+    local function get_pronunciation_url(word)
+        local file_format = config.audio_extension:sub(2)
+        local forvo_page = subprocess { 'curl', '-s', string.format('https://forvo.com/search/%s/ja', url_encode(word)) }.stdout
+        local play_params = string.match(forvo_page, "Play%((.-)%);")
+
+        if play_params then
+            local iter = string.gmatch(play_params, "'(.-)'")
+            local formats = { mp3 = iter(), ogg = iter() }
+            return string.format('https://audio00.forvo.com/%s/%s', file_format, base64d(formats[file_format]))
+        end
+    end
+
+    local function get_forvo_pronunciation(word)
+        local audio_url = get_pronunciation_url(word)
+
+        if is_empty(audio_url) then
+            msg.warn(string.format("Seems like Forvo doesn't have audio for word %s.", word))
+            return
+        end
+
+        local filename = string.format('forvo_%s%s', platform.windows and os.time() or word, config.audio_extension)
+        local tmp_filepath = utils.join_path(platform.tmp_dir(), filename)
+
+        local result
+        if curl_save(audio_url, tmp_filepath) and reencode_and_store(tmp_filepath, filename) then
+            result = string.format('[sound:%s]', filename)
+        else
+            msg.warn(string.format("Couldn't download audio for word %s from Forvo.", word))
+        end
+
+        os.remove(tmp_filepath)
+        return result
+    end
+
+    append_forvo_pronunciation = function(appended_data, stored_data)
+        if config.use_forvo == 'no' then
+            return appended_data
+        end
+        if type(stored_data[config.vocab_audio_field]) ~= 'string' then
+            return appended_data
+        end
+        if is_empty(stored_data[config.vocab_field]) then
+            return appended_data
+        end
+        if config.use_forvo == 'always' or is_empty(stored_data[config.vocab_audio_field]) then
+            local forvo_pronunciation = get_forvo_pronunciation(stored_data[config.vocab_field])
+            if not is_empty(forvo_pronunciation) then
+                if config.vocab_audio_field == config.audio_field then
+                    -- improperly configured fields. don't lose sentence audio
+                    appended_data[config.vocab_audio_field] = forvo_pronunciation .. appended_data[config.vocab_audio_field]
+                else
+                    appended_data[config.vocab_audio_field] = forvo_pronunciation
+                end
+            end
+        end
+        return appended_data
+    end
+end
+
+------------------------------------------------------------
 -- provides interface for creating audio clips and snapshots
 
 encoder = {}
@@ -749,8 +806,10 @@ ankiconnect.store_file = function(filename, file_path)
     local _, error = ankiconnect.parse_result(ret)
     if not error then
         msg.info(string.format("File stored: '%s'.", filename))
+        return true
     else
         msg.error(string.format("Couldn't store file '%s': %s", filename, error))
+        return false
     end
 end