Sometimes it is not possible to get orientation from metadata. For example if user made a photo using camera of mobile device with wrong orientation.
My solution is based on Jack Fan answer and for google-api-services-vision (avalible via Maven).
my TextUnit class
public class TextUnit {
private String text;
// X of lowest left point
private float llx;
// Y of lowest left point
private float lly;
// X of upper right point
private float urx;
// Y of upper right point
private float ury;
}
base method:
List<TextUnit> extractData(BatchAnnotateImagesResponse response) throws AnnotateImageResponseException {
List<TextUnit> data = new ArrayList<>();
for (AnnotateImageResponse res : response.getResponses()) {
if (null != res.getError()) {
String errorMessage = res.getError().getMessage();
logger.log(Level.WARNING, "AnnotateImageResponse ERROR: " + errorMessage);
throw new AnnotateImageResponseException("AnnotateImageResponse ERROR: " + errorMessage);
} else {
List<EntityAnnotation> texts = response.getResponses().get(0).getTextAnnotations();
if (texts.size() > 0) {
//get orientation
EntityAnnotation first_word = texts.get(1);
int orientation;
try {
orientation = getExifOrientation(first_word);
} catch (NullPointerException e) {
try {
orientation = getExifOrientation(texts.get(2));
} catch (NullPointerException e1) {
orientation = EXIF_ORIENTATION_NORMAL;
}
}
logger.log(Level.INFO, "orientation: " + orientation);
// Calculate the center
float centerX = 0, centerY = 0;
for (Vertex vertex : first_word.getBoundingPoly().getVertices()) {
if (vertex.getX() != null) {
centerX += vertex.getX();
}
if (vertex.getY() != null) {
centerY += vertex.getY();
}
}
centerX /= 4;
centerY /= 4;
for (int i = 1; i < texts.size(); i++) {//exclude first text - it contains all text of the page
String blockText = texts.get(i).getDescription();
BoundingPoly poly = texts.get(i).getBoundingPoly();
try {
float llx = 0;
float lly = 0;
float urx = 0;
float ury = 0;
if (orientation == EXIF_ORIENTATION_NORMAL) {
poly = invertSymmetricallyBy0X(centerY, poly);
llx = getLlx(poly);
lly = getLly(poly);
urx = getUrx(poly);
ury = getUry(poly);
} else if (orientation == EXIF_ORIENTATION_90_DEGREE) {
//invert by x
poly = rotate(centerX, centerY, poly, Math.toRadians(-90));
poly = invertSymmetricallyBy0Y(centerX, poly);
llx = getLlx(poly);
lly = getLly(poly);
urx = getUrx(poly);
ury = getUry(poly);
} else if (orientation == EXIF_ORIENTATION_180_DEGREE) {
poly = rotate(centerX, centerY, poly, Math.toRadians(-180));
poly = invertSymmetricallyBy0Y(centerX, poly);
llx = getLlx(poly);
lly = getLly(poly);
urx = getUrx(poly);
ury = getUry(poly);
}else if (orientation == EXIF_ORIENTATION_270_DEGREE){
//invert by x
poly = rotate(centerX, centerY, poly, Math.toRadians(-270));
poly = invertSymmetricallyBy0Y(centerX, poly);
llx = getLlx(poly);
lly = getLly(poly);
urx = getUrx(poly);
ury = getUry(poly);
}
data.add(new TextUnit(blockText, llx, lly, urx, ury));
} catch (NullPointerException e) {
//ignore - some polys has not X or Y coordinate if text located closed to bounds.
}
}
}
}
}
return data;
}
helper methods:
private float getLlx(BoundingPoly poly) {
try {
List<Vertex> vertices = poly.getVertices();
ArrayList<Float> xs = new ArrayList<>();
for (Vertex v : vertices) {
float x = 0;
if (v.getX() != null) {
x = v.getX();
}
xs.add(x);
}
Collections.sort(xs);
float llx = (xs.get(0) + xs.get(1)) / 2;
return llx;
} catch (Exception e) {
return 0;
}
}
private float getLly(BoundingPoly poly) {
try {
List<Vertex> vertices = poly.getVertices();
ArrayList<Float> ys = new ArrayList<>();
for (Vertex v : vertices) {
float y = 0;
if (v.getY() != null) {
y = v.getY();
}
ys.add(y);
}
Collections.sort(ys);
float lly = (ys.get(0) + ys.get(1)) / 2;
return lly;
} catch (Exception e) {
return 0;
}
}
private float getUrx(BoundingPoly poly) {
try {
List<Vertex> vertices = poly.getVertices();
ArrayList<Float> xs = new ArrayList<>();
for (Vertex v : vertices) {
float x = 0;
if (v.getX() != null) {
x = v.getX();
}
xs.add(x);
}
Collections.sort(xs);
float urx = (xs.get(xs.size()-1) + xs.get(xs.size()-2)) / 2;
return urx;
} catch (Exception e) {
return 0;
}
}
private float getUry(BoundingPoly poly) {
try {
List<Vertex> vertices = poly.getVertices();
ArrayList<Float> ys = new ArrayList<>();
for (Vertex v : vertices) {
float y = 0;
if (v.getY() != null) {
y = v.getY();
}
ys.add(y);
}
Collections.sort(ys);
float ury = (ys.get(ys.size()-1) +ys.get(ys.size()-2)) / 2;
return ury;
} catch (Exception e) {
return 0;
}
}
/**
* rotate rectangular clockwise
*
* @param poly
* @param theta the angle of rotation in radians
* @return
*/
public BoundingPoly rotate(float centerX, float centerY, BoundingPoly poly, double theta) {
List<Vertex> vertexList = poly.getVertices();
//rotate all vertices in poly
for (Vertex vertex : vertexList) {
float tempX = vertex.getX() - centerX;
float tempY = vertex.getY() - centerY;
// now apply rotation
float rotatedX = (float) (centerX - tempX * cos(theta) + tempY * sin(theta));
float rotatedY = (float) (centerX - tempX * sin(theta) - tempY * cos(theta));
vertex.setX((int) rotatedX);
vertex.setY((int) rotatedY);
}
return poly;
}
/**
* since Google Vision Api returns boundingPoly-s when Coordinates starts from top left corner,
* but Itext uses coordinate system with bottom left start position -
* we need invert the result for continue to work with itext.
*
* @return text units inverted symmetrically by 0X coordinates.
*/
private BoundingPoly invertSymmetricallyBy0X(float centerY, BoundingPoly poly) {
List<Vertex> vertices = poly.getVertices();
for (Vertex v : vertices) {
if (v.getY() != null) {
v.setY((int) (centerY + (centerY - v.getY())));
}
}
return poly;
}
/**
*
* @param centerX
* @param poly
* @return text units inverted symmetrically by 0Y coordinates.
*/
private BoundingPoly invertSymmetricallyBy0Y(float centerX, BoundingPoly poly) {
List<Vertex> vertices = poly.getVertices();
for (Vertex v : vertices) {
if (v.getX() != null) {
v.setX((int) (centerX + (centerX - v.getX())));
}
}
return poly;
}