A JS - xml metadata reader

Photoshop Script Snippets - Note: Full Scripts go in the Photoshop Scripts Forum

Moderators: Tom, Kukurykus

Andrew

A JS - xml metadata reader

Post by Andrew »

This is a fairly versatile xml metadata reading script.

It can read unique and non-unique metadata fields (an example of a non-unique field would be keywords); it handles nested field identifiers; it handles fields which have added information in the opening bracket (eg <exif:Flash rdf:parseType='Resource'>contents</Flash>

Code: Select all// returns all texts within single ot multiple '<tString>' field identifiers
// as string xmlTextStr seperated by '<', ignores empty and whitespace values

function getXmlText (xmlString, tString, xmlTextStr)
{
   var tPos, cPos, xmlString, textString = '';

   for (var i = 0; i < 20; i++) // the 20 limit is an insurance policy
   {
      var re = new RegExp('<'   + tString + '[^><]*>');
      tPos = xmlString.search(re);
      cPos = xmlString.indexOf('</' + tString + '>', tPos);

      if (tPos == -1 || cPos == -1) break;

      var textString = xmlString.slice(tPos + tString.length + 2, cPos);
      xmlString = xmlString.substr(cPos);

      if (textString.search(/\S+/) == -1) continue;

      xmlTextStr += textString + '<';
   }
   return xmlTextStr;
}

// searches for target nested xml field identifiers to submit to getXmlText()    

function findXmlFields (xmlString, tAr, xmlTextStr)
{
   var xmlString, xmlStringEnd = '', test;

   if (tAr.length == 1)  return (xmlTextStr = getXmlText (xmlString, tAr[0], xmlTextStr));

   var re = new RegExp('<'   + tAr[0] + '[^><]*>');
   tPos = xmlString.search(re);
   var xmlString = xmlString.substr(tPos);

   if (tAr.length == 2) return (xmlTextStr = getXmlText (xmlString, tAr[1], xmlTextStr));

   var cPos = xmlString.indexOf('</' + tAr[0] + '>');
   var mPos = xmlString.indexOf('<' + tAr[1] + '>');

   if (!(mPos == -1 || tPos == -1 || cPos == -1))
   {
      xmlStringEnd = xmlString.substr(cPos);
   
      if (mPos < cPos) // next level identifier present in appropriate position
      {
         var shifted = tAr.shift();
         xmlTextStr =  findXmlFields (xmlString.substr(0,cPos), tAr, xmlTextStr);
         tAr.splice(0,0,shifted);
      }
      if (xmlStringEnd.length > 0) xmlTextStr = findXmlFields (xmlStringEnd, tAr, xmlTextStr);
   }   

   return xmlTextStr;
}

function main ()
{
   var textString;
   var textAr = new Array(); tAr = new Array();
   
   tAr[0] = ['dc:subject','rdf:Bag','rdf:li'];
   tAr[1] = ['exif:Flash','exif:Fired']; //
   tAr[2] = ['exif:ExposureTime'];

   var xmlString = app.activeDocument.xmpMetadata.rawData;

   for (var i=0; i<tAr.length; i++)
   {      
      var text = findXmlFields(xmlString, tAr, '');
      textAr = text.split('<');
      textAr.pop(); // gets rid of final '>' separator
      alert(tAr + '\n\n' + textAr);
      // For each metadata field target tAr there is the array textAr
      // containing the corresponding text values (maybe empty, one or multiple)
   }
}
try
{
   main();
}
catch (e)
{
   alert('Script Error\n\n' + e);
}

It is an improved version of the core of the script I created for writing metadata to text layers and it can also be useful as a stage towards editing xmp files (something I do a lot of as part of my raw processing scripts).

For a zipped and commented version click here.

Andrew