Reading jpeg data
Posted: Sun Jan 22, 2006 11:41 am
This script is really not even beta, but I have run into some problems and I didn't know where else to post it.
The good news is I'm sure that this can be done in JS. And I think once I have this working it shouldn't be too hard to read tiffs as well.
But I have hit several hurdles and need help.
1 The jpeg part of the data in a jpeg file uses the Motorola byte order, but the metadata part can be either Motorola or Intel. How can I deal with the byte order with out a bunch of if statemants?
2 Some of the fields in the IFD lookup values. I don't know how to code the lookup in JS. For example the second part of all subIFD is a lookup for the data that subIFD contains. Here are the values with thier meaning
1 = BYTE An 8-bit unsigned integer.,
2 = ASCII An 8-bit byte containing one 7-bit ASCII code. The final byte is terminated with NULL.,
3 = SHORT A 16-bit (2 -byte) unsigned integer,
4 = LONG A 32-bit (4 -byte) unsigned integer,
5 = RATIONAL Two LONGs. The first LONG is the numerator and the second LONG expresses the
denominator.,
7 = UNDEFINED An 8-bit byte that can take any value depending on the field definition,
9 = SLONG A 32-bit (4 -byte) signed integer (2's complement notation),
10 = SRATIONAL Two SLONGs. The first SLONG is the numerator and the second SLONG is the
denominator.
3 The last and biggest problem is some of the fields are double words. Xbytor gave me function to read bytes and words in Motorola byte order and I changed one to read Intel words. I not sure how to change those to read double words. The first subIFD in the test file I'm using is for the description. It's in Intel byte order so the tag is 0E01H, the data is 0200H which means it's ASCII, the length is 08000000H which means the string is 7 bytes long with a 00H termination, and the offset is 9E000000H which means the data is 158 bytes from the start of the header tag.
Here is what I have so far
Code: Select allFile.prototype.readByte = function() {// from Xbytor as well as the basic outline
return this.read(1).charCodeAt(0);
};
File.prototype.readInt16 = function() {
var self = this;
var hi = this.readByte();
var lo = this.readByte();
return (hi << 8) + lo;
};
File.prototype.readIntelInt16 = function() {
var self = this;
var lo = this.readByte();
var hi = this.readByte();
return (hi << 8) + lo;
};
jpegInfo = function(file) {
getUnits = function(seek){
file.seek(seek,0);
var units = file.readByte();
if (units == 0) {
file.Resolution = undefined;
} else if (units == 1) {
units = " dpi";
} else if (units == 2) {
units = " dpcm";
}
if(units != 0) {
file.Resolution = file.readInt16() + units;
}
}
getNextTag = function(){
tagPos = nextTag;
file.seek(nextTag, 0);
tag = file.readInt16();
tagLength = file.readInt16();
nextTag = nextTag+2+tagLength;
return tag;
}
processEXIF = function() {
file.seek(tagPos + 10, 0);//byte order for this section
var EXIFOffset = tagPos + 10;//also start of directory, tags offset data from here
if(file.readInt16() == 18761){;//Intel?
var endian = "Intel";
}else{
var endian = "Motorola";
}
file.seek(EXIFOffset+8,0);
numberOfSubIFD = file.readIntelInt16();
var subIFDTags = new Array();
var subIFDnumber = new Array();
subIFDTags[0] = file.readIntelInt16();
subIFDTags.push(file.readIntelInt16());
subIFDTags.push(file.readIntelInt16());//this is a lookup value. 2 means ASCII how to handle
subIFDTags.push(file.readIntelInt16());//how to read double word? 00080000H in test jpeg
alert(endian+","+numberOfSubIFD+","+subIFDTags[0]+","+subIFDTags[1]);//Intel,12,270,2 with my test file
}
var tagPos = 0;
file.open("r");
file.encoding = 'BINARY';
file.seek(tagPos, 0);// first tag, per the spec
var tag = file.readInt16();
if(tag!=65496){//if not FFD8 not valid jpeg
return;
}
tagPos = tagPos + 2;
file.seek(tagPos, 0);// sceond tag, per the spec
var tag = file.readInt16()
if(tag!=65504){//if not FFEO not valid jpeg
return
}
var tagLength = file.readInt16();
var nextTag = tagPos+2+tagLength;
var unitsPos = tagPos+2+9;//unit offset from tag
var units = getUnits(unitsPos);
getNextTag();//next tag should be EXIF APP1 tag if present
if(tag == 65505){//it's there so process
processEXIF();
}
while(tag !=65472){
getNextTag();
}
var pixelsPos = tagPos+2+3;//unit offset from tag
file.bitDepth = file.readByte();
file.HPixels = file.readInt16();
file.WPixels = file.readInt16();
file.components = file.readByte();
file.seek(2,2);
if(file.readInt16() == 65497){
file.isValid = true;
}
return file;
};
function test () {
var fileRef = new File("/c/angie.jpg");
var newRef = jpegInfo(fileRef)
alert(newRef.Resolution);//240 dpi with my test file
alert(newRef.bitDepth); // 8
alert(newRef.HPixels); // 926
alert(newRef.WPixels); // 1179
alert(newRef.components);// 3
alert(newRef.isValid); // true
};
test();
The good news is I'm sure that this can be done in JS. And I think once I have this working it shouldn't be too hard to read tiffs as well.
But I have hit several hurdles and need help.
1 The jpeg part of the data in a jpeg file uses the Motorola byte order, but the metadata part can be either Motorola or Intel. How can I deal with the byte order with out a bunch of if statemants?
2 Some of the fields in the IFD lookup values. I don't know how to code the lookup in JS. For example the second part of all subIFD is a lookup for the data that subIFD contains. Here are the values with thier meaning
1 = BYTE An 8-bit unsigned integer.,
2 = ASCII An 8-bit byte containing one 7-bit ASCII code. The final byte is terminated with NULL.,
3 = SHORT A 16-bit (2 -byte) unsigned integer,
4 = LONG A 32-bit (4 -byte) unsigned integer,
5 = RATIONAL Two LONGs. The first LONG is the numerator and the second LONG expresses the
denominator.,
7 = UNDEFINED An 8-bit byte that can take any value depending on the field definition,
9 = SLONG A 32-bit (4 -byte) signed integer (2's complement notation),
10 = SRATIONAL Two SLONGs. The first SLONG is the numerator and the second SLONG is the
denominator.
3 The last and biggest problem is some of the fields are double words. Xbytor gave me function to read bytes and words in Motorola byte order and I changed one to read Intel words. I not sure how to change those to read double words. The first subIFD in the test file I'm using is for the description. It's in Intel byte order so the tag is 0E01H, the data is 0200H which means it's ASCII, the length is 08000000H which means the string is 7 bytes long with a 00H termination, and the offset is 9E000000H which means the data is 158 bytes from the start of the header tag.
Here is what I have so far
Code: Select allFile.prototype.readByte = function() {// from Xbytor as well as the basic outline
return this.read(1).charCodeAt(0);
};
File.prototype.readInt16 = function() {
var self = this;
var hi = this.readByte();
var lo = this.readByte();
return (hi << 8) + lo;
};
File.prototype.readIntelInt16 = function() {
var self = this;
var lo = this.readByte();
var hi = this.readByte();
return (hi << 8) + lo;
};
jpegInfo = function(file) {
getUnits = function(seek){
file.seek(seek,0);
var units = file.readByte();
if (units == 0) {
file.Resolution = undefined;
} else if (units == 1) {
units = " dpi";
} else if (units == 2) {
units = " dpcm";
}
if(units != 0) {
file.Resolution = file.readInt16() + units;
}
}
getNextTag = function(){
tagPos = nextTag;
file.seek(nextTag, 0);
tag = file.readInt16();
tagLength = file.readInt16();
nextTag = nextTag+2+tagLength;
return tag;
}
processEXIF = function() {
file.seek(tagPos + 10, 0);//byte order for this section
var EXIFOffset = tagPos + 10;//also start of directory, tags offset data from here
if(file.readInt16() == 18761){;//Intel?
var endian = "Intel";
}else{
var endian = "Motorola";
}
file.seek(EXIFOffset+8,0);
numberOfSubIFD = file.readIntelInt16();
var subIFDTags = new Array();
var subIFDnumber = new Array();
subIFDTags[0] = file.readIntelInt16();
subIFDTags.push(file.readIntelInt16());
subIFDTags.push(file.readIntelInt16());//this is a lookup value. 2 means ASCII how to handle
subIFDTags.push(file.readIntelInt16());//how to read double word? 00080000H in test jpeg
alert(endian+","+numberOfSubIFD+","+subIFDTags[0]+","+subIFDTags[1]);//Intel,12,270,2 with my test file
}
var tagPos = 0;
file.open("r");
file.encoding = 'BINARY';
file.seek(tagPos, 0);// first tag, per the spec
var tag = file.readInt16();
if(tag!=65496){//if not FFD8 not valid jpeg
return;
}
tagPos = tagPos + 2;
file.seek(tagPos, 0);// sceond tag, per the spec
var tag = file.readInt16()
if(tag!=65504){//if not FFEO not valid jpeg
return
}
var tagLength = file.readInt16();
var nextTag = tagPos+2+tagLength;
var unitsPos = tagPos+2+9;//unit offset from tag
var units = getUnits(unitsPos);
getNextTag();//next tag should be EXIF APP1 tag if present
if(tag == 65505){//it's there so process
processEXIF();
}
while(tag !=65472){
getNextTag();
}
var pixelsPos = tagPos+2+3;//unit offset from tag
file.bitDepth = file.readByte();
file.HPixels = file.readInt16();
file.WPixels = file.readInt16();
file.components = file.readByte();
file.seek(2,2);
if(file.readInt16() == 65497){
file.isValid = true;
}
return file;
};
function test () {
var fileRef = new File("/c/angie.jpg");
var newRef = jpegInfo(fileRef)
alert(newRef.Resolution);//240 dpi with my test file
alert(newRef.bitDepth); // 8
alert(newRef.HPixels); // 926
alert(newRef.WPixels); // 1179
alert(newRef.components);// 3
alert(newRef.isValid); // true
};
test();