ocr - dotfiles - personal dotfiles

ocr (624B)
      1 #!/bin/sh
      2 
      3 # copy text from an image to the clipboard
      4 # deps: tesseract sxot selx xclip
      5 # you probably want the jpn.traineddata or jpn_vert.traineddata from here:
      6 # https://github.com/tesseract-ocr/tessdata
      7 # or the capture2text one here:
      8 # https://sourceforge.net/projects/capture2text/files/Dictionaries/Japanese.zip/download
      9 
     10 usage()
     11 {
     12 	echo "usage: ocr [-l lang][-d dpi]"
     13 	exit 1
     14 }
     15 
     16 while getopts "l:d:" arg; do
     17 	case "${arg}" in
     18 	d) dpi="${OPTARG}" ;;
     19 	l) lang="${OPTARG}" ;;
     20 	*) usage ;;
     21 	esac
     22 done
     23 
     24 lang=${lang:-jpn}
     25 dpi=${dpi:-96}
     26 
     27 sxot -g "$(selx)" |
     28 tesseract stdin stdout \
     29 	--dpi $dpi -l $lang |
     30 xclip -sel clip