dotfiles

personal dotfiles
git clone anongit@rnpnr.xyz:dotfiles.git
Log | Files | Refs | Feed | Submodules

ocr (697B)


      1 #!/bin/sh
      2 
      3 # copy text from an image to the clipboard
      4 # deps: tesseract sxot selx xclip
      5 # you probably want the jpn.traineddata or jpn_vert.traineddata from here:
      6 # https://github.com/tesseract-ocr/tessdata
      7 # or the capture2text one here:
      8 # https://sourceforge.net/projects/capture2text/files/Dictionaries/Japanese.zip/download
      9 
     10 TESS_DATA="$HOME/.local/share/tesseract"
     11 
     12 usage()
     13 {
     14 	echo "usage: ocr [-l lang][-d dpi]"
     15 	exit 1
     16 }
     17 
     18 while getopts "l:d:" arg; do
     19 	case "${arg}" in
     20 	d) dpi="${OPTARG}" ;;
     21 	l) lang="${OPTARG}" ;;
     22 	*) usage ;;
     23 	esac
     24 done
     25 
     26 lang=${lang:-jpn}
     27 dpi=${dpi:-96}
     28 
     29 sxot -g "$(selx)" |
     30 tesseract stdin stdout \
     31 	--dpi $dpi -l $lang \
     32 	--tessdata-dir "$TESS_DATA" |
     33 xclip -sel clip