A JS - xml metadata reader

Andrew · Post by **Andrew** » Thu Jul 28, 2005 6:59 am

This is a fairly versatile xml metadata reading script.

It can read unique and non-unique metadata fields (an example of a non-unique field would be keywords); it handles nested field identifiers; it handles fields which have added information in the opening bracket (eg <exif:Flash rdf:parseType='Resource'>contents</Flash>

Code: Select all// returns all texts within single ot multiple '<tString>' field identifiers
// as string xmlTextStr seperated by '<', ignores empty and whitespace values

function getXmlText (xmlString, tString, xmlTextStr)
{
var tPos, cPos, xmlString, textString = '';

for (var i = 0; i < 20; i++) // the 20 limit is an insurance policy
{
   var re = new RegExp('<' + tString + '[^><]*>');
   tPos = xmlString.search(re);
   cPos = xmlString.indexOf('</' + tString + '>', tPos);

   if (tPos == -1 || cPos == -1) break;

   var textString = xmlString.slice(tPos + tString.length + 2, cPos);
   xmlString = xmlString.substr(cPos);

   if (textString.search(/\S+/) == -1) continue;

   xmlTextStr += textString + '<';
}
return xmlTextStr;
}

// searches for target nested xml field identifiers to submit to getXmlText()

function findXmlFields (xmlString, tAr, xmlTextStr)
{
var xmlString, xmlStringEnd = '', test;

if (tAr.length == 1) return (xmlTextStr = getXmlText (xmlString, tAr[0], xmlTextStr));

var re = new RegExp('<' + tAr[0] + '[^><]*>');
tPos = xmlString.search(re);
var xmlString = xmlString.substr(tPos);

if (tAr.length == 2) return (xmlTextStr = getXmlText (xmlString, tAr[1], xmlTextStr));

var cPos = xmlString.indexOf('</' + tAr[0] + '>');
var mPos = xmlString.indexOf('<' + tAr[1] + '>');

if (!(mPos == -1 || tPos == -1 || cPos == -1))
{
   xmlStringEnd = xmlString.substr(cPos);

   if (mPos < cPos) // next level identifier present in appropriate position
   {
      var shifted = tAr.shift();
      xmlTextStr = findXmlFields (xmlString.substr(0,cPos), tAr, xmlTextStr);
      tAr.splice(0,0,shifted);
   }
   if (xmlStringEnd.length > 0) xmlTextStr = findXmlFields (xmlStringEnd, tAr, xmlTextStr);
}

return xmlTextStr;
}

function main ()
{
var textString;
var textAr = new Array(); tAr = new Array();

tAr[0] = ['dc:subject','rdf:Bag','rdf:li'];
tAr[1] = ['exif:Flash','exif:Fired']; //
tAr[2] = ['exif:ExposureTime'];

var xmlString = app.activeDocument.xmpMetadata.rawData;

for (var i=0; i<tAr.length; i++)
{
   var text = findXmlFields(xmlString, tAr, '');
   textAr = text.split('<');
   textAr.pop(); // gets rid of final '>' separator
   alert(tAr + '\n\n' + textAr);
   // For each metadata field target tAr there is the array textAr
   // containing the corresponding text values (maybe empty, one or multiple)
}
}
try
{
main();
}
catch (e)
{
alert('Script Error\n\n' + e);
}

It is an improved version of the core of the script I created for writing metadata to text layers and it can also be useful as a stage towards editing xmp files (something I do a lot of as part of my raw processing scripts).

For a zipped and commented version click here.

Andrew