ocr_plus.go 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. package ocr
  2. import (
  3. "fmt"
  4. "github.com/sirupsen/logrus"
  5. "io/ioutil"
  6. "os"
  7. "os/exec"
  8. "path/filepath"
  9. "strings"
  10. )
  11. func OCRPlus(rawPath string, lang string, psm string, stepList []string) (output string, err error) {
  12. exePath, err := exec.LookPath("tesseract.exe")
  13. if err != nil {
  14. return output, err
  15. }
  16. ocr, err := filepath.Abs(exePath)
  17. if err != nil {
  18. return output, err
  19. }
  20. processedPath, err := ProcessImage(rawPath, stepList)
  21. rawDir, rawFileName := filepath.Split(rawPath)
  22. rawExt := filepath.Ext(rawFileName)
  23. outputPath := filepath.Join(rawDir, fmt.Sprintf("%s", rawFileName[:len(rawFileName)-len(rawExt)]))
  24. outputPath2 := filepath.Join(rawDir, fmt.Sprintf("%s.txt", rawFileName[:len(rawFileName)-len(rawExt)]))
  25. processedPath = filepath.ToSlash(processedPath)
  26. outputPath = filepath.ToSlash(outputPath)
  27. args := []string{processedPath, outputPath, "-l", lang, "--psm", psm}
  28. cmd := exec.Command(ocr, args...)
  29. cmd.Stdout = os.Stdout
  30. cmd.Stderr = os.Stderr
  31. err = cmd.Run()
  32. if err != nil {
  33. logrus.Errorf("cmd.Run (%s %s) error : %v\n", processedPath, strings.Join(args, " "), err)
  34. return output, err
  35. }
  36. outputByte, err := ioutil.ReadFile(outputPath2)
  37. if err != nil {
  38. logrus.Errorf("ioutil.ReadFile(%s) error : %v\n", outputPath, err)
  39. return output, err
  40. }
  41. output = strings.TrimSpace(string(outputByte))
  42. return output, nil
  43. }