I realize this is not a complete solution but I think it's enough to get you started.
The Computer Vision API returns a JSON result with a lines
property that is just an array of objects with a boundingBox
property.
These boundingBox
es are the X,Y coordinates of the top-left and bottom-right coordinates of the "square" of each phrase.
You basically need to process this array and "sort" the items based on this property.
In this JSFiddle you'll see that I'm sorting the lines by Y coordinate and then grouping them.
What's left to do is be "smarter" about the grouping - if the Y coordinates are 201 and 202 you can assume that they are on the same line and just add them to one same line, sorted by ascending X coordinate.
Code:
if (jsonResponse.status == 'Succeeded') {
var result = '';
// Sort lines by Y coordinate
jsonResponse.recognitionResult.lines.sort(function(a, b) {
var topLeftYCoordA = a.boundingBox[1];
var topLeftYCoordB = b.boundingBox[1];
if (topLeftYCoordA > topLeftYCoordB) {
return 1;
}
if (topLeftYCoordA < topLeftYCoordB) {
return -1;
}
return 0;
})
// group lines by Y coordinate
var grouped = {};
jsonResponse.recognitionResult.lines.map(function(line) {
var topLeftYcoordinate = line.boundingBox[1];
if (!grouped[topLeftYcoordinate]) {
grouped[topLeftYcoordinate] = line;
} else {
grouped[topLeftYcoordinate] += line;
}
});
Object.keys(grouped).forEach(function(yCoordinate) {
result += yCoordinate + ' - ' + grouped[yCoordinate].text + '</br>';
})
$(".right").html(result);
}
Result: