preprocess.go 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. package ocr
  2. import (
  3. "fmt"
  4. "github.com/sirupsen/logrus"
  5. "image"
  6. "image/jpeg"
  7. "os"
  8. "path/filepath"
  9. "strings"
  10. )
  11. func Preprocess(rawPath string, stepList []string) (processedPath string, err error) {
  12. steps := strings.Join(stepList, ",")
  13. steps = strings.ToLower(steps)
  14. handle, err := os.Open(rawPath)
  15. if err != nil {
  16. logrus.Errorf("os.Open rawPath(%s) error : %v\n", rawPath, err)
  17. return processedPath, err
  18. }
  19. defer handle.Close()
  20. src, err := jpeg.Decode(handle)
  21. if err != nil {
  22. logrus.Errorf("jpeg.Encode error : %v\n", err)
  23. return processedPath, err
  24. }
  25. var grayData *image.Gray
  26. if strings.Contains(steps, "gray") == true {
  27. grayData = Gray(src, ToGrayLuminance)
  28. // Apply binarization using Otsu's method
  29. grayImg := image.NewGray(grayData.Bounds())
  30. for y := 0; y < grayData.Bounds().Dy(); y++ {
  31. for x := 0; x < grayData.Bounds().Dx(); x++ {
  32. grayImg.Set(x, y, grayData.At(x, y))
  33. }
  34. }
  35. } else if strings.Contains(steps, "threshold") == true {
  36. threshold := Otsu(grayData)
  37. Threshold(grayData, threshold, 255, 0)
  38. }
  39. rawDir, rawFileName := filepath.Split(rawPath)
  40. rawExt := filepath.Ext(rawFileName)
  41. processedPath = filepath.Join(rawDir, fmt.Sprintf("%s_processed%v", rawFileName[:len(rawFileName)-len(rawExt)], rawExt))
  42. processedHandle, err := os.Create(processedPath)
  43. if err != nil {
  44. logrus.Errorf("os.Open processedPath(%s) error : %v\n", processedPath, err)
  45. return processedPath, err
  46. }
  47. defer processedHandle.Close()
  48. err = jpeg.Encode(processedHandle, grayData, &jpeg.Options{Quality: 80})
  49. if err != nil {
  50. logrus.Errorf("jpeg.Encode error : %v\n", err)
  51. return processedPath, err
  52. }
  53. return processedPath, nil
  54. }