diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..17e4986 --- /dev/null +++ b/pom.xml @@ -0,0 +1,19 @@ + + + 4.0.0 + + xyz.wbsite + wbocr + 1.0-SNAPSHOT + + + + + net.sourceforge.tess4j + tess4j + 4.3.0 + + + \ No newline at end of file diff --git a/src/main/java/xyz/wbsite/ocr/Ocr.java b/src/main/java/xyz/wbsite/ocr/Ocr.java new file mode 100644 index 0000000..a6ebf35 --- /dev/null +++ b/src/main/java/xyz/wbsite/ocr/Ocr.java @@ -0,0 +1,33 @@ +package xyz.wbsite.ocr; + +import net.sourceforge.tess4j.ITesseract; +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.TesseractException; + +import java.io.File; + +public class Ocr { + + public static void main(String[] args){ + + ITesseract instance = new Tesseract(); + //如果未将tessdata放在根目录下需要指定绝对路径 + instance.setDatapath("E:\\workspace\\wbocr\\src\\main\\resources\\tessdata"); + + //如果需要识别英文之外的语种,需要指定识别语种,并且需要将对应的语言包放进项目中 + instance.setLanguage("chi_sim"); + + // 指定识别图片 + File imgDir = new File("D:\\2018041914044684.png"); + long startTime = System.currentTimeMillis(); + String ocrResult = null; + try { + ocrResult = instance.doOCR(imgDir); + } catch (TesseractException e) { + e.printStackTrace(); + } + + // 输出识别结果 + System.out.println("OCR Result: \n" + ocrResult + "\n 耗时:" + (System.currentTimeMillis() - startTime) + "ms"); + } +} diff --git a/src/main/resources/tessdata/chi_sim.traineddata b/src/main/resources/tessdata/chi_sim.traineddata new file mode 100644 index 0000000..eeb66cf Binary files /dev/null and b/src/main/resources/tessdata/chi_sim.traineddata differ diff --git a/src/main/resources/tessdata/eng.traineddata b/src/main/resources/tessdata/eng.traineddata new file mode 100644 index 0000000..f4744c2 Binary files /dev/null and b/src/main/resources/tessdata/eng.traineddata differ