blob: 8340972e702a7f21b155a2e7077ab7f1b247d4cf [file] [log] [blame]
/*
* Copyright 2023 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "src/codec/SkJpegXmp.h"
#include "src/codec/SkCodecPriv.h"
#include "src/codec/SkJpegPriv.h"
#include "src/core/SkMD5.h"
SkJpegXmp::SkJpegXmp() = default;
constexpr size_t kGuidAsciiSize = 32;
/*
* Extract standard XMP metadata.
*
* See XMP Specification Part 3: Storage in files, Section 1.1.3: JPEG.
*/
static sk_sp<SkData> read_xmp_standard(const std::vector<sk_sp<SkData>>& decoderApp1Params) {
constexpr size_t kSigSize = sizeof(kXMPStandardSig);
// Iterate through the image's segments.
for (const auto& params : decoderApp1Params) {
// Skip segments that don't have the right marker, signature, or are too small.
if (params->size() <= kSigSize) {
continue;
}
if (memcmp(params->bytes(), kXMPStandardSig, kSigSize) != 0) {
continue;
}
return SkData::MakeWithoutCopy(params->bytes() + kSigSize, params->size() - kSigSize);
}
return nullptr;
}
/*
* Extract and validate extended XMP metadata.
*
* See XMP Specification Part 3: Storage in files, Section 1.1.3.1: Extended XMP in JPEG:
* Each chunk is written into the JPEG file within a separate APP1 marker segment. Each ExtendedXMP
* marker segment contains:
* - A null-terminated signature string
* - A 128-bit GUID stored as a 32-byte ASCII hex string, capital A-F, no null termination. The
* GUID is a 128-bit MD5 digest of the full ExtendedXMP serialization.
* - The full length of the ExtendedXMP serialization as a 32-bit unsigned integer.
* - The offset of this portion as a 32-bit unsigned integer.
* - The portion of the ExtendedXMP
*/
static sk_sp<SkData> read_xmp_extended(const std::vector<sk_sp<SkData>>& decoderApp1Params,
const char* guidAscii) {
constexpr size_t kSigSize = sizeof(kXMPExtendedSig);
constexpr size_t kFullLengthSize = 4;
constexpr size_t kOffsetSize = 4;
constexpr size_t kHeaderSize = kSigSize + kGuidAsciiSize + kFullLengthSize + kOffsetSize;
// Validate the provided ASCII guid.
SkMD5::Digest guidAsDigest;
if (strlen(guidAscii) != kGuidAsciiSize) {
SkCodecPrintf("Invalid ASCII GUID size.\n");
return nullptr;
}
for (size_t i = 0; i < kGuidAsciiSize; ++i) {
uint8_t digit = 0;
if (guidAscii[i] >= '0' && guidAscii[i] <= '9') {
digit = guidAscii[i] - '0';
} else if (guidAscii[i] >= 'A' && guidAscii[i] <= 'F') {
digit = guidAscii[i] - 'A' + 10;
} else {
SkCodecPrintf("GUID is not upper-case hex.\n");
return nullptr;
}
if (i % 2 == 0) {
guidAsDigest.data[i / 2] = 16 * digit;
} else {
guidAsDigest.data[i / 2] += digit;
}
}
// Iterate through the image's segments.
uint32_t fullLength = 0;
using Part = std::tuple<uint32_t, sk_sp<SkData>>;
std::vector<Part> parts;
for (const auto& params : decoderApp1Params) {
// Skip segments that don't have the right marker, signature, or are too small.
if (params->size() <= kHeaderSize) {
continue;
}
if (memcmp(params->bytes(), kXMPExtendedSig, kSigSize) != 0) {
continue;
}
// Ignore parts that do not match the expected GUID.
const uint8_t* partGuidAscii = params->bytes() + kSigSize;
if (memcmp(guidAscii, partGuidAscii, kGuidAsciiSize) != 0) {
SkCodecPrintf("Ignoring unexpected GUID.\n");
continue;
}
// Read the full length and the offset for this part.
uint32_t partFullLength = 0;
uint32_t partOffset = 0;
const uint8_t* partFullLengthBytes = params->bytes() + kSigSize + kGuidAsciiSize;
const uint8_t* partOffsetBytes =
params->bytes() + kSigSize + kGuidAsciiSize + kFullLengthSize;
for (size_t i = 0; i < 4; ++i) {
partFullLength *= 256;
partOffset *= 256;
partFullLength += partFullLengthBytes[i];
partOffset += partOffsetBytes[i];
}
// If this is the first part, set our global full length size.
if (parts.empty()) {
fullLength = partFullLength;
}
// Ensure all parts agree on the full length.
if (partFullLength != fullLength) {
SkCodecPrintf("Multiple parts had different total lengths.\n");
return nullptr;
}
// Add it to the list.
auto partData = SkData::MakeWithoutCopy(params->bytes() + kHeaderSize,
params->size() - kHeaderSize);
parts.push_back({partOffset, partData});
}
if (parts.empty() || fullLength == 0) {
return nullptr;
}
// Sort the list of parts by offset.
std::sort(parts.begin(), parts.end(), [](const Part& a, const Part& b) {
return std::get<0>(a) < std::get<0>(b);
});
// Stitch the parts together. Fail if we find that they are not contiguous.
auto xmpExtendedData = SkData::MakeUninitialized(fullLength);
uint8_t* xmpExtendedBase = reinterpret_cast<uint8_t*>(xmpExtendedData->writable_data());
uint8_t* xmpExtendedCurrent = xmpExtendedBase;
SkMD5 md5;
for (const auto& part : parts) {
uint32_t currentOffset = static_cast<uint32_t>(xmpExtendedCurrent - xmpExtendedBase);
uint32_t partOffset = std::get<0>(part);
const sk_sp<SkData>& partData = std::get<1>(part);
// Make sure the data is contiguous and doesn't overflow the buffer.
if (partOffset != currentOffset) {
SkCodecPrintf("XMP extension parts not contiguous\n");
return nullptr;
}
if (partData->size() > fullLength - currentOffset) {
SkCodecPrintf("XMP extension parts overflow\n");
return nullptr;
}
memcpy(xmpExtendedCurrent, partData->data(), partData->size());
xmpExtendedCurrent += partData->size();
}
// Make sure we wrote the full buffer.
if (static_cast<uint32_t>(xmpExtendedCurrent - xmpExtendedBase) != fullLength) {
SkCodecPrintf("XMP extension did not match full length.\n");
return nullptr;
}
// Make sure the MD5 hash of the extended data matched the GUID.
md5.write(xmpExtendedData->data(), xmpExtendedData->size());
if (md5.finish() != guidAsDigest) {
SkCodecPrintf("XMP extension did not hash to GUID.\n");
return nullptr;
}
return xmpExtendedData;
}
/*
* Given an SkDOM, verify that the dom is XMP, and find the first rdf:Description node that matches
* the specified namespaces to the specified URIs. The XML structure that this function matches is
* as follows (with NAMESPACEi and URIi being the parameters specified to this function):
*
* <x:xmpmeta ...>
* <rdf:RDF ...>
* <rdf:Description NAMESPACE0="URI0" NAMESPACE1="URI1" .../>
* </rdf:RDF>
* </x:xmpmeta>
*/
const SkDOM::Node* find_namespace_uri_match(const SkDOM& dom,
const char* namespaces[],
const char* uris[],
size_t count) {
const SkDOM::Node* root = dom.getRootNode();
if (!root) {
return nullptr;
}
// Ensure that the root node identifies itself as XMP metadata.
const char* rootName = dom.getName(root);
if (!rootName || strcmp(rootName, "x:xmpmeta") != 0) {
return nullptr;
}
// Iterate the children with name rdf:RDF.
const char* kRdf = "rdf:RDF";
for (const auto* rdf = dom.getFirstChild(root, kRdf); rdf;
rdf = dom.getNextSibling(rdf, kRdf)) {
// Iterate the children with name rdf::Description.
const char* kDesc = "rdf:Description";
for (const auto* desc = dom.getFirstChild(rdf, kDesc); desc;
desc = dom.getNextSibling(desc, kDesc)) {
// See if this node has the requested namespace-URI pairs as attributes.
bool allNamespaceURIsMatch = true;
for (size_t i = 0; i < count; ++i) {
if (!dom.hasAttr(desc, namespaces[i], uris[i])) {
allNamespaceURIsMatch = false;
break;
}
}
if (allNamespaceURIsMatch) {
return desc;
}
}
}
return nullptr;
}
std::unique_ptr<SkJpegXmp> SkJpegXmp::Make(const std::vector<sk_sp<SkData>>& decoderApp1Params) {
auto xmpStandard = read_xmp_standard(decoderApp1Params);
if (!xmpStandard) {
return nullptr;
}
std::unique_ptr<SkJpegXmp> xmp(new SkJpegXmp);
auto xmpStandardStream = SkMemoryStream::Make(xmpStandard);
if (!xmp->fStandardDOM.build(*xmpStandardStream)) {
SkCodecPrintf("Failed to parse XMP standard metadata.\n");
return nullptr;
}
// See if there is a note indicating extended XMP. If we encounter any errors in retrieving
// the extended XMP, return just the standard XMP.
const char* namespaces[1] = {"xmlns:xmpNote"};
const char* uris[1] = {"http://ns.adobe.com/xmp/note/"};
const auto* extendedNode = find_namespace_uri_match(xmp->fStandardDOM, namespaces, uris, 1);
if (!extendedNode) {
return xmp;
}
// Extract the GUID (the MD5 hash) of the extended metadata.
const char* extendedGuid = xmp->fStandardDOM.findAttr(extendedNode, "xmpNote:HasExtendedXMP");
if (!extendedGuid) {
return xmp;
}
// Extract and validate the extended metadata from the JPEG structure.
auto xmpExtended = read_xmp_extended(decoderApp1Params, extendedGuid);
if (!xmpExtended) {
SkCodecPrintf("Extended XMP was indicated but failed to read or validate.\n");
return xmp;
}
// Parse the extended metadata.
auto xmpExtendedStream = SkMemoryStream::Make(xmpExtended);
if (xmp->fExtendedDOM.build(*xmpExtendedStream)) {
SkCodecPrintf("Failed to parse extended XMP metadata.\n");
return xmp;
}
return xmp;
}
bool SkJpegXmp::findNamespaceUriMatch(const char* namespaces[],
const char* uris[],
size_t count,
const SkDOM** outDom,
const SkDOM::Node** outNode) const {
// See XMP Specification Part 3: Storage in files, Section 1.1.3.1: Extended XMP in JPEG:
// A JPEG reader must recompose the StandardXMP and ExtendedXMP into a single data model tree
// containing all of the XMP for the JPEG file, and remove the xmpNote:HasExtendedXMP property.
// This code does not do that. Instead, it maintains the two separate trees and searches them
// sequentially.
*outNode = find_namespace_uri_match(fStandardDOM, namespaces, uris, count);
if (*outNode) {
*outDom = &fStandardDOM;
return true;
}
*outNode = find_namespace_uri_match(fExtendedDOM, namespaces, uris, count);
if (*outNode) {
*outDom = &fExtendedDOM;
return true;
}
*outDom = nullptr;
return false;
}