ocr (697B)
1 #!/bin/sh 2 3 # copy text from an image to the clipboard 4 # deps: tesseract sxot selx xclip 5 # you probably want the jpn.traineddata or jpn_vert.traineddata from here: 6 # https://github.com/tesseract-ocr/tessdata 7 # or the capture2text one here: 8 # https://sourceforge.net/projects/capture2text/files/Dictionaries/Japanese.zip/download 9 10 TESS_DATA="$HOME/.local/share/tesseract" 11 12 usage() 13 { 14 echo "usage: ocr [-l lang][-d dpi]" 15 exit 1 16 } 17 18 while getopts "l:d:" arg; do 19 case "${arg}" in 20 d) dpi="${OPTARG}" ;; 21 l) lang="${OPTARG}" ;; 22 *) usage ;; 23 esac 24 done 25 26 lang=${lang:-jpn} 27 dpi=${dpi:-96} 28 29 sxot -g "$(selx)" | 30 tesseract stdin stdout \ 31 --dpi $dpi -l $lang \ 32 --tessdata-dir "$TESS_DATA" | 33 xclip -sel clip