Extract IPTC information from JPEG using Javascript
Asked Answered
S

3

11

I'm trying to extract IPTC photo caption information from a JPEG file using Javascript. (I know I can do this server-side, but I'm looking specifically for a Javascript solution.)

I found this script, which extracts EXIF information ... but I'm not sure how to adapt it to grab IPTC data.

Are there any existing scripts that offer such functionality? If not, how would you modify the EXIF script to also parse IPTC data?

UPDATE

I've modified the EXIF script I linked above. It sorta does what I want, but it's not grabbing the right data 100 percent of the time.

After line 401, I added:

else if (iMarker == 237) {
        // 0xED = Application-specific 13 (Photoshop IPTC)                
        if (bDebug) log("Found 0xFFED marker");   
        return readIPTCData(oFile, iOffset + 4, getShortAt(oFile, iOffset+2, true)-2);                       
}

And then elsewhere in the script, I added this function:

function readIPTCData(oFile, iStart, iLength) {
    exif = new Array();

if (getStringAt(oFile, iStart, 9) != "Photoshop") {
    if (bDebug) log("Not valid Photoshop data! " + getStringAt(oFile, iStart, 9));
    return false;
}

var output = '';
var count = 0;
two = new Array();
for (i=0; i<iLength; i++) {
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 120) {
      var caption = getString2At(oFile, iStart + i + 2, 800);
   }
   if (getByteAt(oFile, iStart + i) == 2 && getByteAt(oFile, iStart + i + 1) == 80) {
      var credit = getString2At(oFile, iStart + i + 2, 300);
   }       
}

exif['ImageDescription'] = caption;
exif['Artist'] = credit;

return exif;

}

So let me now modify my question slightly. How can the function above be improved?

Steffin answered 29/4, 2011 at 13:59 Comment(4)
This will only be possible in browsers that support the new-ish HTML5 file APIs. That script you linked appears to get image data by fetching it from the server, which is probably not what you're wanting to do.Corvette
Yes, that's perfectly fine -- I'm developing this for internal use only, and we've all got the latest browsers, so it's not a problem.Steffin
This isn't working for me. Do you have a jsfiddle example?Camey
For anyone else who DOES need cross-browser support, jDataView is a nice way to work with binary data in JavaScript.Lamaism
E
6

For what it's worth, I extrapolated on this a bit... I haven't done a whole lot of testing, but the few test images I have seem to work.

    var bDebug = false;

    var fieldMap = {
        120 : 'caption',
        110 : 'credit',
        25 : 'keywords',
        85 : 'byline',
        122 : 'captionWriter',
        105 : 'headline',
        116 : 'copyright',
        15 : 'category'
    };

    function readIPTCData(oFile, iStart, iLength) {
        var data = {};

        if (oFile.getStringAt(iStart, 9) != "Photoshop") {
            if (bDebug) log("Not valid Photoshop data! " + oFile.getStringAt(iStart, 9));
            return false;
        }

        var fileLength = oFile.getLength();

        var length, offset, fieldStart, title, value;
        var FILE_SEPARATOR_CHAR = 28,
            START_OF_TEXT_CHAR = 2;

        for (var i = 0; i < iLength; i++) {

            fieldStart = iStart + i;
            if(oFile.getByteAt(fieldStart) == START_OF_TEXT_CHAR && oFile.getByteAt(fieldStart + 1) in fieldMap) {
                length = 0;
                offset = 2;

                while(
                    fieldStart + offset < fileLength &&
                    oFile.getByteAt(fieldStart + offset) != FILE_SEPARATOR_CHAR &&
                    oFile.getByteAt(fieldStart + offset + 1) != START_OF_TEXT_CHAR) { offset++; length++; }

                if(!length) { continue; }

                title = fieldMap[oFile.getByteAt(fieldStart + 1)];
                value = oFile.getStringAt(iStart + i + 2, length) || '';
                value = value.replace('\000','').trim();

                data[title] = value;
                i+=length-1;
            }
        }

        return data;

    }

    function findIPTCinJPEG(oFile) {
        var aMarkers = [];

        if (oFile.getByteAt(0) != 0xFF || oFile.getByteAt(1) != 0xD8) {
            return false; // not a valid jpeg
        }

        var iOffset = 2;
        var iLength = oFile.getLength();
        while (iOffset < iLength) {
            if (oFile.getByteAt(iOffset) != 0xFF) {
                if (bDebug) console.log("Not a valid marker at offset " + iOffset + ", found: " + oFile.getByteAt(iOffset));
                return false; // not a valid marker, something is wrong
            }

            var iMarker = oFile.getByteAt(iOffset+1);

            if (iMarker == 237) {
                if (bDebug) console.log("Found 0xFFED marker");
                return readIPTCData(oFile, iOffset + 4, oFile.getShortAt(iOffset+2, true)-2);

            } else {
                iOffset += 2 + oFile.getShortAt(iOffset+2, true);
            }

        }

    }

    IPTC.readFromBinaryFile = function(oFile) {
        return findIPTCinJPEG(oFile);
    }
Evvie answered 6/9, 2012 at 18:28 Comment(1)
what does 'oFile' expected to get? Where is the definition for IPTC? Will it work for PNG as well?Rexferd
N
2

I'd like to suggest library exifr that works in both Node.js and browser. And it also supports the new HEIC image format.

exifr.parse(input, {iptc: true}).then(output => {
  console.log('IPTC', output)
})

It parses multiple data formats (TIFF/EXIF, ICC, IPTC, XMP, JFIF) but IPTC isn't enabled by default so you need to enabled it in options as seen in the example.

Naught answered 12/3, 2020 at 13:33 Comment(0)
A
0

Well, this should get you going on creating your own javascript parser if you can't find a library that already does this.

http://www.iptc.org/std/photometadata/specification/IPTC-PhotoMetadata%28200907%29_1.pdf

Amal answered 29/4, 2011 at 14:4 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.