Itext sign pdf by string signature in base 64 from client - java

I am trying to sign a pdf document with a signature that comes from the entire client in format base 64.
the service makes a request to calculate the hash from the document
I take the content from the pdf of the document, calculate the hash from it according to the algorithm.
service takes the received hash and signs it, sends the received signature along with the bytes of the document to be signed
I get a string in base 64 and pdf bytes to be signed
Is it possiple case? I give a code example
public byte[] insertSignature(byte[] document, String signature) {
try (InputStream inputStream = new ByteArrayInputStream(document);
ByteArrayOutputStream os = new ByteArrayOutputStream();
ByteArrayOutputStream result = new ByteArrayOutputStream()) {
byte[] decodeSignature = Base64.decodeBase64(signature);
CAdESSignature cades = new CAdESSignature(decodeSignature, null, null);
var certificate = cades.getCAdESSignerInfo(0).getSignerCertificate();
var subject = new Subject(certificate.getSubjectX500Principal().getEncoded());
List<String> names = getSignaturesFields(document);
String sigFieldName = String.format("Signature %s", names.size() + 1);
PdfName filter = PdfName.Adobe_PPKLite;
PdfName subFilter = PdfName.ETSI_CAdES_DETACHED;
int estimatedSize = 8192;
PdfReader reader = new PdfReader(inputStream);
StampingProperties stampingProperties = new StampingProperties();
if (names.size() > 1) {
stampingProperties.useAppendMode();
}
PdfSigner signer = new PdfSigner(reader, os, stampingProperties);
signer.setCertificationLevel(PdfSigner.CERTIFIED_NO_CHANGES_ALLOWED);
PdfSignatureAppearance appearance = signer.getSignatureAppearance();
appearance
.setContact(subject.email().orElse(""))
.setSignatureCreator(subject.organizationName().orElse(""))
.setLocation(subject.country())
.setReuseAppearance(false)
.setPageNumber(1);
signer.setFieldName(sigFieldName);
ContainerForPrepareSignedDocument external = new ContainerForPrepareSignedDocument(filter, subFilter);
signer.signExternalContainer(external, estimatedSize);
byte[] preSignedBytes = os.toByteArray();
ContainerReadyToSignedDocument extSigContainer = new ContainerReadyToSignedDocument(decodeSignature);
PdfDocument docToSign = new PdfDocument(new PdfReader(new ByteArrayInputStream(preSignedBytes)));
PdfSigner.signDeferred(docToSign, sigFieldName, result, extSigContainer);
docToSign.close();
return result.toByteArray();
}
catch (IOException e) {
throw new InternalException("IO exception by insert signature to document:", e);
}
catch (GeneralSecurityException e) {
throw new InternalException("General security by insert signature to document:", e);
}
catch (CAdESException e) {
throw new InternalException("CAdESException by insert signature to document:", e);
}
}
private List<String> getSignaturesFields(byte[] document)
throws IOException {
try (InputStream inputStream = new ByteArrayInputStream(document);
PdfReader reader = new PdfReader(inputStream);
PdfDocument pdfDocument = new PdfDocument(reader)) {
SignatureUtil signUtil = new SignatureUtil(pdfDocument);
return signUtil.getSignatureNames();
}
}
static class ContainerForPrepareSignedDocument implements IExternalSignatureContainer {
private final PdfName filter;
private final PdfName subFilter;
public ContainerForPrepareSignedDocument(PdfName filter,
PdfName subFilter) {
this.filter = filter;
this.subFilter = subFilter;
}
public byte[] sign(InputStream docBytes) {
return new byte[0];
}
public void modifySigningDictionary(PdfDictionary signDic) {
signDic.put(PdfName.Filter, filter);
signDic.put(PdfName.SubFilter, subFilter);
}
}
static class ContainerReadyToSignedDocument implements IExternalSignatureContainer {
private byte[] cmsSignatureContents;
public ContainerReadyToSignedDocument(byte[] cmsSignatureContents) {
this.cmsSignatureContents = cmsSignatureContents;
}
public byte[] sign(InputStream docBytes) {
return cmsSignatureContents;
}
public void modifySigningDictionary(PdfDictionary signDic) {
}
}

Related

Itext sign pdf with external signature causes validation fail (“the document has been altered or corrupted..")

I try to sign pdf document using itext7, certificate and external signature returned from external web service say Sign Service:
I did the following steps:
Got the orginal pdf, added last page (sign page) with 2 signatures filelds on it and created temp pdf
Calculated hash from created temp pdf
Exchanhed with Sign Service my Base64 encoded hash with encoded Base64 signed hash (I'm not sure is this raw or CMS signature - I treat it as CMS container)
Decoded and put obtained signed hash along with certificate from Sign Company to one of my Sig field on temp pdf file. I will need sign subseqent field/fields in this way in the furure.
Unfortunately i got validation errors in Adobe Reader: “the document has been altered or corrupted since the signature was applied”:
link to Adobe validation result
Below the code fragment where I create sign page:
private void createPdfDocument(Document doc, int iteration) {
//Add last sign page to doc
doc.add(new AreaBreak(AreaBreakType.LAST_PAGE));
doc.add(new AreaBreak(AreaBreakType.NEXT_PAGE));
PdfPage lastPage = doc.getPdfDocument().getLastPage();
float width = lastPage.getPageSize().getWidth();
float height = lastPage.getPageSize().getHeight();
createTitle(doc);
PdfAcroForm form = PdfAcroForm.getAcroForm(doc.getPdfDocument(), true);
for (int i = 1; i <= iteration; i++) {
addSignArea(doc, form, VERTICAL_RECTANGLE_START - (i - 1) * VERTICAL_MARGIN,
VERTICAL_TEXT_START - (i - 1) * VERTICAL_MARGIN, i);
}
System.out.println("Creating sign page finished");
}
private void addSignArea(Document doc, PdfAcroForm form, int verticalRectPosition, int verticalFieldPosition, int iteration) {
Color color = new DeviceRgb(46, 66, 148);
//Create sign area frame
new PdfCanvas(doc.getPdfDocument().getLastPage())
.roundRectangle(50, verticalRectPosition, 495, 50, 5)
.setLineWidth(0.5f)
.setStrokeColor(color)
.stroke();
//Create text fields inside frame
PdfSignatureFormField signField = PdfSignatureFormField.createSignature(doc.getPdfDocument(),
new Rectangle(50, verticalRectPosition, 495, 50));
signField.setFieldName(getFieldCountedName("Signature", iteration));
form.addField(signField);
}
I calculate document hash that way:
public String getDocumentHash() {
try (FileInputStream is = new FileInputStream(DOC)) {
byte[] hash = DigestAlgorithms.digest(is, DigestAlgorithms.SHA256, null);
String encodeToString = Base64.getEncoder().encodeToString(hash);
System.out.println(encodeToString);
return encodeToString;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
And finally sign pdf file:
public class DocumentSigner {
public static final String DEST = "";
private static final String SOURCE = "";
private static final String DOC_HASH = "6XsoKhEXVMu8e0R7BGtaKvghwL0GBrqTGAivFpct6J4=";
public static final String[] RESULT_FILES = new String[]{
"sign_doc_result1.pdf"
};
public static void main(String[] args) throws GeneralSecurityException, IOException {
File file = new File(DEST);
file.mkdirs();
Certificate[] chain = new Certificate[1];
chain[0] = CertLoadTest.getPublicCert(); //load cert from path
String encodedExternalHash = getExternalSignedHash(); //get the signded hash returned from the Sign Service
new DocumentSigner().sign(SOURCE, DEST + RESULT_FILES[0], chain, PdfSigner.CryptoStandard.CMS,
encodedExternalHash, DOC_HASH, "Signature1");
}
public void sign(String src, String dest, Certificate[] chain, PdfSigner.CryptoStandard subfilter,
String encodedExternalHash, String documentHash, String fieldName) throws GeneralSecurityException, IOException {
try (FileOutputStream os = new FileOutputStream(dest); InputStream is = new FileInputStream(src)) {
PdfReader reader = new PdfReader(is);
PdfSigner signer = new PdfSigner(reader, os, new StampingProperties());
signer.setFieldName(fieldName);
IExternalDigest digest = new BouncyCastleDigest();
IExternalSignature signature = new CustomSignature(Base64.getDecoder().decode(encodedExternalHash),
Base64.getDecoder().decode(documentHash), chain);
signer.signDetached(digest, signature, chain, null, null, null,
8096, subfilter);
}
}
public class CustomSignature implements IExternalSignature {
private byte[] signedHash;
private byte[] documentHash;
private Certificate[] chain;
public CustomSignature(byte[] signedHash, byte[] documentHash, Certificate[] chain) {
this.signedHash = signedHash;
this.documentHash = documentHash;
this.chain = chain;
}
public String getHashAlgorithm() {
return DigestAlgorithms.SHA256;
}
public String getEncryptionAlgorithm() {
return "RSA";
}
public byte[] sign(byte[] message) throws GeneralSecurityException {
return signedHash;
}
}
private static String getExternalSignedHash() {
//mocked Sign Service result - documentHash is exchanged with signedHash
return "3BLqVMOLSFXEfCy++n0DmRqcfCGCqSLy9Nzpn1IpAn6iTqr+h78+yOomGMAL0La77IB08Tou9gkxbwSXPHrdN5+EPm7HCXeI/z3fzj711H9OH6P9tWtVHgieKUFOVhrm/PTeypSC/vy7RJQLNmL5+/+Moby5Bdo/KaaN2h9Jj41w1i6CwL/7wzCZ0h+AU9sI+IC0i/UbWFFz7VMfN5barcF1vP+ECLiX3qtZrGbFZNZfrr+28ytNTdUR4iZJRLKL2nXeg0CqxsTjnAyUsFMTCro1qv0QkQO8Cv6AJFhWlUFGUkt+pIUKhIticlypB+WdzwmISOsRK0IUiKgrJI6E3g==";
}
A also tried to treat returned from Sign Service hash as a raw signature - this is what sign method in CustomSignature class looks like then:
BouncyCastleDigest digest = new BouncyCastleDigest();
PdfPKCS7 sgn = new PdfPKCS7(null, chain, "SHA256", null, digest, false);
byte[] sh = sgn.getAuthenticatedAttributeBytes(documentHash, PdfSigner.CryptoStandard.CMS, null, null);
sgn.setExternalDigest(signedHash, null, "RSA");
byte[] encodedSig = sgn.getEncodedPKCS7(documentHash, PdfSigner.CryptoStandard.CMS, null, null, null);
return encodedSig;
But in this case i get formmatting signature errors in Adobe Reader
Is my flow correct or maybe i need another approach to properly sign document.
According to advice posted in the comment, I still use custom IExternalSignature implementation with external call in sign method:
public void sign(Certificate[] chain, PdfSigner.CryptoStandard subfilter, String fieldName) throws GeneralSecurityException, IOException {
try (InputStream is = new FileInputStream(src); FileOutputStream os = new FileOutputStream(dest)) {
PdfReader reader = new PdfReader(is);
PdfSigner signer = new PdfSigner(reader, os, new StampingProperties());
signer.setFieldName(fieldName); //My signature fields
IExternalDigest digest = new BouncyCastleDigest();
IExternalSignature signature = new CustomSignature(chain);
signer.signDetached(digest, signature, chain, null, null, null,
8196, subfilter);
}
}
public class CustomSignature implements IExternalSignature {
private Certificate[] chain;
public CustomSignature(Certificate[] chain) {
this.chain = chain;
}
public String getHashAlgorithm() {
return DigestAlgorithms.SHA256;
}
public String getEncryptionAlgorithm() {
return "RSA";
}
public byte[] sign(byte[] message) throws GeneralSecurityException {
BouncyCastleDigest digest = new BouncyCastleDigest();
byte[] hash = digest.getMessageDigest("SHA256").digest(message);
return Base64.getDecoder().decode(client.getSignedHash(Base64.getEncoder().encodeToString(hash))); // call externall service here
}
}
And for the first call validation error disappeared, Signature1 seems to be ok, but a problem occurred when I tried to sign second sig field using pdf generetaed in first call and takes another file as output. Now newly created Signature2 is ok, but the first one failed with a broken byte range:
new DocumentSigner(SOURCE, DEST1).sign(chain,PdfSigner.CryptoStandard.CMS, "Signature1");
new DocumentSigner(DEST1, DEST2).sign(chain, PdfSigner.CryptoStandard.CMS, "Signature2");
I will be grateful for any ideas what can I do to sign multiple fields without broke previous ones
Here is Adobe validation output after the second call
UPDATE:
I used append mode on PdfSigner's StampingProperties and now everything is ok:
StampingProperties stampingProperties = new StampingProperties();
stampingProperties.useAppendMode();
PdfSigner signer = new PdfSigner(reader, os, stampingProperties);

Base64 digest + PFX(PKCS12) -> ETSI.CAdES.detached signature -> PAdES LTV

I have an API that creates Base64 digest of a PDF Document.
Now I want to create another API that takes this digest and PFX and creates an ETSI.CAdES.detached signature and takes LTV informations(Certs chain,OCSP response,CRL) that I want to embed in my PDF to obtain a PAdES-LTV signature using 3rd API(My 3rd API will take CAdES signature and LTV informations obtained from this API and will embed them in my PDF).I dont know how to create this ETSI.CAdES.detached signature using that digest and a PFX with Java and Bouncy Castle.I try to follow this github tutorial.
As you have declared, you have your own code for preparing a PDF for signing and for injecting the signature container into it. Thus, your question essentially burns down to
How to create a CAdES signature container with BouncyCastle that can be used to create a PAdES BASELINE B or T PDF signature?
Implementation in the iText 7 Signing Framework
As I do not have your existing code, I had to use a different framework for my tests. I used the iText 7 signing framework for that.
BouncyCastle does contain a CMSSignedDataGenerator to generate CMS signature containers.
The default implementation of the SignerInfo generation therein unfortunately is not CAdES/PAdES compatible as it does not create signed ESSCertID[v2] attributes. Fortunately, though, the implementation is designed to allow plugging in custom attributes sets.
Thus, you can create the CAdES containers required for PAdES BASELINE signatures with a customized CMSSignedDataGenerator.
So when you have prepared the PDF for signing, you can proceed like this:
InputStream data = [InputStream containing the PDF byte ranges to sign];
ContentSigner contentSigner = [BouncyCastle ContentSigner for your private key];
X509CertificateHolder x509CertificateHolder = [BouncyCastle X509CertificateHolder for your X.509 signer certificate];
DigestCalculatorProvider digestCalculatorProvider = new JcaDigestCalculatorProviderBuilder().setProvider("BC").build();
CMSTypedData msg = new CMSTypedDataInputStream(data);
CMSSignedDataGenerator gen = new CMSSignedDataGenerator();
gen.addSignerInfoGenerator(
new JcaSignerInfoGeneratorBuilder(digestCalculatorProvider)
.setSignedAttributeGenerator(new PadesSignedAttributeGenerator())
.setUnsignedAttributeGenerator(new PadesUnsignedAttributeGenerator())
.build(contentSigner, x509CertificateHolder));
gen.addCertificates(new JcaCertStore(Collections.singleton(x509CertificateHolder)));
CMSSignedData sigData = gen.generate(msg, false);
byte[] cmsBytes = sigData.getEncoded();
(PadesSignatureContainerBc method sign)
The byte[] cmsBytes contains the bytes to inject into the prepared PDF signature placeholder.
The following helper classes are needed:
First of all a wrapper for the InputStream containing the PDF ranges to sign to process by BouncyCastle.
class CMSTypedDataInputStream implements CMSTypedData {
InputStream in;
public CMSTypedDataInputStream(InputStream is) {
in = is;
}
#Override
public ASN1ObjectIdentifier getContentType() {
return PKCSObjectIdentifiers.data;
}
#Override
public Object getContent() {
return in;
}
#Override
public void write(OutputStream out) throws IOException,
CMSException {
byte[] buffer = new byte[8 * 1024];
int read;
while ((read = in.read(buffer)) != -1) {
out.write(buffer, 0, read);
}
in.close();
}
}
(PadesSignatureContainerBc helper class CMSTypedDataInputStream)
Then a customized signed attributes generator for PAdES:
class PadesSignedAttributeGenerator implements CMSAttributeTableGenerator {
#Override
public AttributeTable getAttributes(#SuppressWarnings("rawtypes") Map params) throws CMSAttributeTableGenerationException {
String currentAttribute = null;
try {
ASN1EncodableVector signedAttributes = new ASN1EncodableVector();
currentAttribute = "SigningCertificateAttribute";
AlgorithmIdentifier digAlgId = (AlgorithmIdentifier) params.get(CMSAttributeTableGenerator.DIGEST_ALGORITHM_IDENTIFIER);
signedAttributes.add(createSigningCertificateAttribute(digAlgId));
currentAttribute = "ContentTypeAttribute";
ASN1ObjectIdentifier contentType = ASN1ObjectIdentifier.getInstance(params.get(CMSAttributeTableGenerator.CONTENT_TYPE));
signedAttributes.add(new Attribute(CMSAttributes.contentType, new DERSet(contentType)));
currentAttribute = "MessageDigestAttribute";
byte[] messageDigest = (byte[])params.get(CMSAttributeTableGenerator.DIGEST);
signedAttributes.add(new Attribute(CMSAttributes.messageDigest, new DERSet(new DEROctetString(messageDigest))));
return new AttributeTable(signedAttributes);
} catch (Exception e) {
throw new CMSAttributeTableGenerationException(currentAttribute, e);
}
}
Attribute createSigningCertificateAttribute(AlgorithmIdentifier digAlg) throws IOException, OperatorCreationException {
final IssuerSerial issuerSerial = getIssuerSerial();
DigestCalculator digestCalculator = digestCalculatorProvider.get(digAlg);
digestCalculator.getOutputStream().write(x509CertificateHolder.getEncoded());
final byte[] certHash = digestCalculator.getDigest();
if (OIWObjectIdentifiers.idSHA1.equals(digAlg.getAlgorithm())) {
final ESSCertID essCertID = new ESSCertID(certHash, issuerSerial);
SigningCertificate signingCertificate = new SigningCertificate(essCertID);
return new Attribute(id_aa_signingCertificate, new DERSet(signingCertificate));
} else {
ESSCertIDv2 essCertIdv2;
if (NISTObjectIdentifiers.id_sha256.equals(digAlg.getAlgorithm())) {
// SHA-256 is default
essCertIdv2 = new ESSCertIDv2(null, certHash, issuerSerial);
} else {
essCertIdv2 = new ESSCertIDv2(digAlg, certHash, issuerSerial);
}
SigningCertificateV2 signingCertificateV2 = new SigningCertificateV2(essCertIdv2);
return new Attribute(id_aa_signingCertificateV2, new DERSet(signingCertificateV2));
}
}
IssuerSerial getIssuerSerial() {
final X500Name issuerX500Name = x509CertificateHolder.getIssuer();
final GeneralName generalName = new GeneralName(issuerX500Name);
final GeneralNames generalNames = new GeneralNames(generalName);
final BigInteger serialNumber = x509CertificateHolder.getSerialNumber();
return new IssuerSerial(generalNames, serialNumber);
}
}
(PadesSignatureContainerBc helper class PadesSignedAttributeGenerator )
And finally a customized unsigned attributes generator for a signature timestamp:
class PadesUnsignedAttributeGenerator implements CMSAttributeTableGenerator {
#Override
public AttributeTable getAttributes(#SuppressWarnings("rawtypes") Map params) throws CMSAttributeTableGenerationException {
if (tsaClient == null)
return null;
try {
ASN1EncodableVector unsignedAttributes = new ASN1EncodableVector();
byte[] signature = (byte[])params.get(CMSAttributeTableGenerator.SIGNATURE);
byte[] timestamp = tsaClient.getTimeStampToken(tsaClient.getMessageDigest().digest(signature));
unsignedAttributes.add(new Attribute(id_aa_signatureTimeStampToken, new DERSet(ASN1Primitive.fromByteArray(timestamp))));
return new AttributeTable(unsignedAttributes);
} catch (Exception e) {
throw new CMSAttributeTableGenerationException("", e);
}
}
}
(PadesSignatureContainerBc helper class PadesUnsignedAttributeGenerator)
Here I assume a ITSAClient tsaClient, an iText 7 time stamp request client. You can of course use an arbitrary RFC 3161 time stamp request client of your choice.
If you have read your private key into a JCA/JCE PrivateKey pk, you can simply create the needed ContentSigner contentSigner using the BouncyCastle JcaContentSignerBuilder, e.g. like this:
ContentSigner contentSigner = new JcaContentSignerBuilder("SHA512withRSA").build(pk);
(compare the test testSignPadesBaselineT in SignPadesBc)
Implementation in the PDFBox 3 Signing Framework
You meanwhile indicated in comments that you're looking into using PDFBox to sign. Fortunately the code presented above can nearly without a change be used with PDFBox.
To use the code above with PDFBox, one merely has to wrap it into a PDFBox SignatureInterface frame:
public class PadesSignatureContainerBc implements SignatureInterface {
public PadesSignatureContainerBc(X509CertificateHolder x509CertificateHolder, ContentSigner contentSigner, TSAClient tsaClient) throws OperatorCreationException {
this.contentSigner = contentSigner;
this.tsaClient = tsaClient;
this.x509CertificateHolder = x509CertificateHolder;
digestCalculatorProvider = new JcaDigestCalculatorProviderBuilder().setProvider("BC").build();
}
#Override
public byte[] sign(InputStream content) throws IOException {
try {
CMSTypedData msg = new CMSTypedDataInputStream(content);
CMSSignedDataGenerator gen = new CMSSignedDataGenerator();
gen.addSignerInfoGenerator(
new JcaSignerInfoGeneratorBuilder(digestCalculatorProvider)
.setSignedAttributeGenerator(new PadesSignedAttributeGenerator())
.setUnsignedAttributeGenerator(new PadesUnsignedAttributeGenerator())
.build(contentSigner, x509CertificateHolder));
gen.addCertificates(new JcaCertStore(Collections.singleton(x509CertificateHolder)));
CMSSignedData sigData = gen.generate(msg, false);
return sigData.getEncoded();
} catch (OperatorCreationException | GeneralSecurityException | CMSException e) {
throw new IOException(e);
}
}
final ContentSigner contentSigner;
final X509CertificateHolder x509CertificateHolder;
final TSAClient tsaClient;
final DigestCalculatorProvider digestCalculatorProvider;
class CMSTypedDataInputStream implements CMSTypedData {
InputStream in;
public CMSTypedDataInputStream(InputStream is) {
in = is;
}
#Override
public ASN1ObjectIdentifier getContentType() {
return PKCSObjectIdentifiers.data;
}
#Override
public Object getContent() {
return in;
}
#Override
public void write(OutputStream out) throws IOException,
CMSException {
byte[] buffer = new byte[8 * 1024];
int read;
while ((read = in.read(buffer)) != -1) {
out.write(buffer, 0, read);
}
in.close();
}
}
class PadesSignedAttributeGenerator implements CMSAttributeTableGenerator {
#Override
public AttributeTable getAttributes(#SuppressWarnings("rawtypes") Map params) throws CMSAttributeTableGenerationException {
String currentAttribute = null;
try {
ASN1EncodableVector signedAttributes = new ASN1EncodableVector();
currentAttribute = "SigningCertificateAttribute";
AlgorithmIdentifier digAlgId = (AlgorithmIdentifier) params.get(CMSAttributeTableGenerator.DIGEST_ALGORITHM_IDENTIFIER);
signedAttributes.add(createSigningCertificateAttribute(digAlgId));
currentAttribute = "ContentType";
ASN1ObjectIdentifier contentType = ASN1ObjectIdentifier.getInstance(params.get(CMSAttributeTableGenerator.CONTENT_TYPE));
signedAttributes.add(new Attribute(CMSAttributes.contentType, new DERSet(contentType)));
currentAttribute = "MessageDigest";
byte[] messageDigest = (byte[])params.get(CMSAttributeTableGenerator.DIGEST);
signedAttributes.add(new Attribute(CMSAttributes.messageDigest, new DERSet(new DEROctetString(messageDigest))));
return new AttributeTable(signedAttributes);
} catch (Exception e) {
throw new CMSAttributeTableGenerationException(currentAttribute, e);
}
}
Attribute createSigningCertificateAttribute(AlgorithmIdentifier digAlg) throws IOException, OperatorCreationException {
final IssuerSerial issuerSerial = getIssuerSerial();
DigestCalculator digestCalculator = digestCalculatorProvider.get(digAlg);
digestCalculator.getOutputStream().write(x509CertificateHolder.getEncoded());
final byte[] certHash = digestCalculator.getDigest();
if (OIWObjectIdentifiers.idSHA1.equals(digAlg.getAlgorithm())) {
final ESSCertID essCertID = new ESSCertID(certHash, issuerSerial);
SigningCertificate signingCertificate = new SigningCertificate(essCertID);
return new Attribute(id_aa_signingCertificate, new DERSet(signingCertificate));
} else {
ESSCertIDv2 essCertIdv2;
if (NISTObjectIdentifiers.id_sha256.equals(digAlg.getAlgorithm())) {
// SHA-256 is default
essCertIdv2 = new ESSCertIDv2(null, certHash, issuerSerial);
} else {
essCertIdv2 = new ESSCertIDv2(digAlg, certHash, issuerSerial);
}
SigningCertificateV2 signingCertificateV2 = new SigningCertificateV2(essCertIdv2);
return new Attribute(id_aa_signingCertificateV2, new DERSet(signingCertificateV2));
}
}
public IssuerSerial getIssuerSerial() {
final X500Name issuerX500Name = x509CertificateHolder.getIssuer();
final GeneralName generalName = new GeneralName(issuerX500Name);
final GeneralNames generalNames = new GeneralNames(generalName);
final BigInteger serialNumber = x509CertificateHolder.getSerialNumber();
return new IssuerSerial(generalNames, serialNumber);
}
}
class PadesUnsignedAttributeGenerator implements CMSAttributeTableGenerator {
#Override
public AttributeTable getAttributes(#SuppressWarnings("rawtypes") Map params) throws CMSAttributeTableGenerationException {
if (tsaClient == null)
return null;
try {
ASN1EncodableVector unsignedAttributes = new ASN1EncodableVector();
byte[] signature = (byte[])params.get(CMSAttributeTableGenerator.SIGNATURE);
byte[] timestamp = tsaClient.getTimeStampToken(new ByteArrayInputStream(signature)).getEncoded();
unsignedAttributes.add(new Attribute(id_aa_signatureTimeStampToken, new DERSet(ASN1Primitive.fromByteArray(timestamp))));
return new AttributeTable(unsignedAttributes);
} catch (Exception e) {
throw new CMSAttributeTableGenerationException("", e);
}
}
}
}
(PDFBox PadesSignatureContainerBc implementation of SignatureInterface)
You can use it like this
try ( PDDocument pdDocument = Loader.loadPDF(SOURCE_PDF) )
{
SignatureInterface signatureInterface = new PadesSignatureContainerBc(new X509CertificateHolder(chain[0].getEncoded()),
new JcaContentSignerBuilder("SHA512withRSA").build(pk),
new TSAClient(new URL("http://timestamp.server/rfc3161endpoint"), null, null, MessageDigest.getInstance("SHA-256")));
PDSignature signature = new PDSignature();
signature.setFilter(COSName.getPDFName("MKLx_PAdES_SIGNER"));
signature.setSubFilter(COSName.getPDFName("ETSI.CAdES.detached"));
signature.setName("Example User");
signature.setLocation("Los Angeles, CA");
signature.setReason("Testing");
signature.setSignDate(Calendar.getInstance());
pdDocument.addSignature(signature);
ExternalSigningSupport externalSigning = pdDocument.saveIncrementalForExternalSigning(RESULT_OUTPUT);
// invoke external signature service
byte[] cmsSignature = signatureInterface.sign(externalSigning.getContent());
// set signature bytes received from the service
externalSigning.setSignature(cmsSignature);
}
(PDFBox SignPadesBc test testSignPadesBaselineT)

iText7 LtvVerification.addVerification not enabling LTV

We are trying to make the signed signature LTV enabled. I am using the below code to add verification. When signature.isTsp() is false, the PDF says Signature is not LTV enabled, though in the other case (signature.isTsp() is true) it shows as valid.
When we open the PDF and try to manually add verification info by right clicking on the signature it enables LTV without any issue. Not sure what we are missing here.
Any input will be highly helpful.
// Adds LTV-enabled information to the PDF document.
private ByteArrayOutputStream addLtv(final IOcspClient ocspClient,
final ByteArrayOutputStream docStream)
throws IOException, GeneralSecurityException {
final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
final InputStream signedStream = new ByteArrayInputStream(docStream.toByteArray());
final PdfReader reader = new PdfReader(signedStream);
final PdfDocument document =
new PdfDocument(reader, new PdfWriter(outputStream), new StampingProperties().useAppendMode());
final LtvVerification verification = new LtvVerification(document);
final SignatureUtil signatureUtil = new SignatureUtil(document);
final List<String> signatureNames = signatureUtil.getSignatureNames();
final String sigName = signatureNames.get(signatureNames.size() - 1);
final PdfPKCS7 signature = signatureUtil.verifySignature(sigName);
final CrlClientOnline crl = new CrlClientOnline();
if (!signature.isTsp()) {
for (final String name: signatureNames) {
addVerificationInfo(ocspClient, verification, crl, name);
}
} else {
addVerificationInfo(ocspClient, verification, crl, sigName);
}
document.close();
return outputStream;
}
private void addVerificationInfo(final IOcspClient ocspClient, final LtvVerification verification,
final CrlClientOnline crl,
final String name) throws IOException, GeneralSecurityException {
verification.addVerification(
name, ocspClient, crl,
LtvVerification.CertificateOption.WHOLE_CHAIN,
LtvVerification.Level.OCSP_CRL,
LtvVerification.CertificateInclusion.NO);
}
The main reason why your code does not always LTV-enable PDFs is that it does not add validation information related to OCSP response signatures.
It doesn't add validation information for CRL signatures, either. As CRLs usually are signed by the issuer certificate of the signer certificate, though, and as validation information for that issuer certificate have already been added in the context of the main signature, LTV-enabling usually does not fail because of missing CRL signature validation information. So if you can use CRLs only, chances are that your code indeed already does LTV-enable PDFs.
In the context of this answer (in particular its section "An approach using an own utility class") I created an utility class AdobeLtvEnabling for iText 5 allowing to LTV-enable PDFs, mostly using bits and pieces found in iText 5 itself. In contrast to your code it does add validation information for OCSP response signatures (and also for CRL signatures).
Here you can find the port of that class to iText 7.
The utility class AdobeLtvEnabling
This utility class bundles the code required for LTV enabling the signatures in a signed PDF document. The code pieces mostly have been taken from existing iText code. The main reason why this class has not been designed to derive from LtvVerification is that required variables and methods from that class are private. As the class originally has been written for iText 5, some iText-5-isms probably can be found in it...
public class AdobeLtvEnabling {
/**
* Use this constructor with a {#link PdfDocument} in append mode. Otherwise
* the existing signatures will be damaged.
*/
public AdobeLtvEnabling(PdfDocument pdfDocument) {
this.pdfDocument = pdfDocument;
}
/**
* Call this method to have LTV information added to the {#link PdfDocument}
* given in the constructor.
*/
public void enable(IOcspClient ocspClient, ICrlClient crlClient) throws OperatorException, GeneralSecurityException, IOException, StreamParsingException, OCSPException {
SignatureUtil signatureUtil = new SignatureUtil(pdfDocument);
List<String> names = signatureUtil.getSignatureNames();
for (String name : names) {
PdfPKCS7 pdfPKCS7 = signatureUtil.verifySignature(name, BouncyCastleProvider.PROVIDER_NAME);
PdfSignature sig = signatureUtil.getSignature(name);
List<X509Certificate> certificatesToCheck = new ArrayList<>();
certificatesToCheck.add(pdfPKCS7.getSigningCertificate());
while (!certificatesToCheck.isEmpty()) {
X509Certificate certificate = certificatesToCheck.remove(0);
addLtvForChain(certificate, ocspClient, crlClient, getSignatureHashKey(sig));
}
}
outputDss();
}
//
// the actual LTV enabling methods
//
void addLtvForChain(X509Certificate certificate, IOcspClient ocspClient, ICrlClient crlClient, PdfName key) throws GeneralSecurityException, IOException, StreamParsingException, OperatorCreationException, OCSPException {
ValidationData validationData = new ValidationData();
while (certificate != null) {
System.out.println(certificate.getSubjectX500Principal().getName());
X509Certificate issuer = getIssuerCertificate(certificate);
validationData.certs.add(certificate.getEncoded());
byte[] ocspResponse = ocspClient.getEncoded(certificate, issuer, null);
if (ocspResponse != null) {
System.out.println(" with OCSP response");
validationData.ocsps.add(ocspResponse);
X509Certificate ocspSigner = getOcspSignerCertificate(ocspResponse);
if (ocspSigner != null) {
System.out.printf(" signed by %s\n", ocspSigner.getSubjectX500Principal().getName());
}
addLtvForChain(ocspSigner, ocspClient, crlClient, getOcspHashKey(ocspResponse));
} else {
Collection<byte[]> crl = crlClient.getEncoded(certificate, null);
if (crl != null && !crl.isEmpty()) {
System.out.printf(" with %s CRLs\n", crl.size());
validationData.crls.addAll(crl);
for (byte[] crlBytes : crl) {
addLtvForChain(null, ocspClient, crlClient, getCrlHashKey(crlBytes));
}
}
}
certificate = issuer;
}
validated.put(key, validationData);
}
void outputDss() throws IOException {
PdfDictionary dss = new PdfDictionary();
PdfDictionary vrim = new PdfDictionary();
PdfArray ocsps = new PdfArray();
PdfArray crls = new PdfArray();
PdfArray certs = new PdfArray();
PdfCatalog catalog = pdfDocument.getCatalog();
if (pdfDocument.getPdfVersion().compareTo(PdfVersion.PDF_2_0) < 0) {
catalog.addDeveloperExtension(PdfDeveloperExtension.ESIC_1_7_EXTENSIONLEVEL5);
catalog.addDeveloperExtension(new PdfDeveloperExtension(PdfName.ADBE, new PdfName("1.7"), 8));
}
for (PdfName vkey : validated.keySet()) {
PdfArray ocsp = new PdfArray();
PdfArray crl = new PdfArray();
PdfArray cert = new PdfArray();
PdfDictionary vri = new PdfDictionary();
for (byte[] b : validated.get(vkey).crls) {
PdfStream ps = new PdfStream(b);
ps.setCompressionLevel(CompressionConstants.DEFAULT_COMPRESSION);
ps.makeIndirect(pdfDocument);
crl.add(ps);
crls.add(ps);
crls.setModified();
}
for (byte[] b : validated.get(vkey).ocsps) {
b = buildOCSPResponse(b);
PdfStream ps = new PdfStream(b);
ps.setCompressionLevel(CompressionConstants.DEFAULT_COMPRESSION);
ps.makeIndirect(pdfDocument);
ocsp.add(ps);
ocsps.add(ps);
ocsps.setModified();
}
for (byte[] b : validated.get(vkey).certs) {
PdfStream ps = new PdfStream(b);
ps.setCompressionLevel(CompressionConstants.DEFAULT_COMPRESSION);
ps.makeIndirect(pdfDocument);
cert.add(ps);
certs.add(ps);
certs.setModified();
}
if (ocsp.size() > 0) {
ocsp.makeIndirect(pdfDocument);
vri.put(PdfName.OCSP, ocsp);
}
if (crl.size() > 0) {
crl.makeIndirect(pdfDocument);
vri.put(PdfName.CRL, crl);
}
if (cert.size() > 0) {
cert.makeIndirect(pdfDocument);
vri.put(PdfName.Cert, cert);
}
vri.put(PdfName.TU, new PdfDate().getPdfObject());
vri.makeIndirect(pdfDocument);
vrim.put(vkey, vri);
}
vrim.makeIndirect(pdfDocument);
vrim.setModified();
dss.put(PdfName.VRI, vrim);
if (ocsps.size() > 0) {
ocsps.makeIndirect(pdfDocument);
dss.put(PdfName.OCSPs, ocsps);
}
if (crls.size() > 0) {
crls.makeIndirect(pdfDocument);
dss.put(PdfName.CRLs, crls);
}
if (certs.size() > 0) {
certs.makeIndirect(pdfDocument);
dss.put(PdfName.Certs, certs);
}
dss.makeIndirect(pdfDocument);
dss.setModified();
catalog.put(PdfName.DSS, dss);
}
//
// VRI signature hash key calculation
//
static PdfName getCrlHashKey(byte[] crlBytes) throws NoSuchAlgorithmException, IOException, CRLException, CertificateException {
CertificateFactory cf = CertificateFactory.getInstance("X.509");
X509CRL crl = (X509CRL)cf.generateCRL(new ByteArrayInputStream(crlBytes));
byte[] signatureBytes = crl.getSignature();
DEROctetString octetString = new DEROctetString(signatureBytes);
byte[] octetBytes = octetString.getEncoded();
byte[] octetHash = hashBytesSha1(octetBytes);
PdfName octetName = new PdfName(convertToHex(octetHash));
return octetName;
}
static PdfName getOcspHashKey(byte[] basicResponseBytes) throws NoSuchAlgorithmException, IOException {
BasicOCSPResponse basicResponse = BasicOCSPResponse.getInstance(basicResponseBytes);
byte[] signatureBytes = basicResponse.getSignature().getBytes();
DEROctetString octetString = new DEROctetString(signatureBytes);
byte[] octetBytes = octetString.getEncoded();
byte[] octetHash = hashBytesSha1(octetBytes);
PdfName octetName = new PdfName(convertToHex(octetHash));
return octetName;
}
static PdfName getSignatureHashKey(PdfSignature sig) throws NoSuchAlgorithmException, IOException {
PdfString contents = sig.getContents();
byte[] bc = PdfEncodings.convertToBytes(contents.getValue(), null);
if (PdfName.ETSI_RFC3161.equals(sig.getSubFilter())) {
try ( ASN1InputStream din = new ASN1InputStream(new ByteArrayInputStream(bc)) ) {
ASN1Primitive pkcs = din.readObject();
bc = pkcs.getEncoded();
}
}
byte[] bt = hashBytesSha1(bc);
return new PdfName(convertToHex(bt));
}
static byte[] hashBytesSha1(byte[] b) throws NoSuchAlgorithmException {
MessageDigest sh = MessageDigest.getInstance("SHA1");
return sh.digest(b);
}
static String convertToHex(byte[] bytes) {
ByteBuffer buf = new ByteBuffer();
for (byte b : bytes) {
buf.appendHex(b);
}
return PdfEncodings.convertToString(buf.toByteArray(), null).toUpperCase();
}
//
// OCSP response helpers
//
static X509Certificate getOcspSignerCertificate(byte[] basicResponseBytes) throws CertificateException, OCSPException, OperatorCreationException {
JcaX509CertificateConverter converter = new JcaX509CertificateConverter().setProvider(BouncyCastleProvider.PROVIDER_NAME);
BasicOCSPResponse borRaw = BasicOCSPResponse.getInstance(basicResponseBytes);
BasicOCSPResp bor = new BasicOCSPResp(borRaw);
for (final X509CertificateHolder x509CertificateHolder : bor.getCerts()) {
X509Certificate x509Certificate = converter.getCertificate(x509CertificateHolder);
JcaContentVerifierProviderBuilder jcaContentVerifierProviderBuilder = new JcaContentVerifierProviderBuilder();
jcaContentVerifierProviderBuilder.setProvider(BouncyCastleProvider.PROVIDER_NAME);
final PublicKey publicKey = x509Certificate.getPublicKey();
ContentVerifierProvider contentVerifierProvider = jcaContentVerifierProviderBuilder.build(publicKey);
if (bor.isSignatureValid(contentVerifierProvider))
return x509Certificate;
}
return null;
}
static byte[] buildOCSPResponse(byte[] BasicOCSPResponse) throws IOException {
DEROctetString doctet = new DEROctetString(BasicOCSPResponse);
ASN1EncodableVector v2 = new ASN1EncodableVector();
v2.add(OCSPObjectIdentifiers.id_pkix_ocsp_basic);
v2.add(doctet);
ASN1Enumerated den = new ASN1Enumerated(0);
ASN1EncodableVector v3 = new ASN1EncodableVector();
v3.add(den);
v3.add(new DERTaggedObject(true, 0, new DERSequence(v2)));
DERSequence seq = new DERSequence(v3);
return seq.getEncoded();
}
//
// X509 certificate related helpers
//
static X509Certificate getIssuerCertificate(X509Certificate certificate) throws IOException, StreamParsingException {
String url = getCACURL(certificate);
if (url != null && url.length() > 0) {
HttpURLConnection con = (HttpURLConnection)new URL(url).openConnection();
if (con.getResponseCode() / 100 != 2) {
throw new PdfException(PdfException.InvalidHttpResponse1).setMessageParams(con.getResponseCode());
}
InputStream inp = (InputStream) con.getContent();
X509CertParser parser = new X509CertParser();
parser.engineInit(new ByteArrayInputStream(StreamUtil.inputStreamToArray(inp)));
return (X509Certificate) parser.engineRead();
}
return null;
}
static String getCACURL(X509Certificate certificate) {
ASN1Primitive obj;
try {
obj = getExtensionValue(certificate, Extension.authorityInfoAccess.getId());
if (obj == null) {
return null;
}
ASN1Sequence AccessDescriptions = (ASN1Sequence) obj;
for (int i = 0; i < AccessDescriptions.size(); i++) {
ASN1Sequence AccessDescription = (ASN1Sequence) AccessDescriptions.getObjectAt(i);
if ( AccessDescription.size() != 2 ) {
continue;
}
else if (AccessDescription.getObjectAt(0) instanceof ASN1ObjectIdentifier) {
ASN1ObjectIdentifier id = (ASN1ObjectIdentifier)AccessDescription.getObjectAt(0);
if ("1.3.6.1.5.5.7.48.2".equals(id.getId())) {
ASN1Primitive description = (ASN1Primitive)AccessDescription.getObjectAt(1);
String AccessLocation = getStringFromGeneralName(description);
if (AccessLocation == null) {
return "" ;
}
else {
return AccessLocation ;
}
}
}
}
} catch (IOException e) {
return null;
}
return null;
}
static ASN1Primitive getExtensionValue(X509Certificate certificate, String oid) throws IOException {
byte[] bytes = certificate.getExtensionValue(oid);
if (bytes == null) {
return null;
}
ASN1InputStream aIn = new ASN1InputStream(new ByteArrayInputStream(bytes));
ASN1OctetString octs = (ASN1OctetString) aIn.readObject();
aIn = new ASN1InputStream(new ByteArrayInputStream(octs.getOctets()));
return aIn.readObject();
}
static String getStringFromGeneralName(ASN1Primitive names) throws IOException {
ASN1TaggedObject taggedObject = (ASN1TaggedObject) names ;
return new String(ASN1OctetString.getInstance(taggedObject, false).getOctets(), "ISO-8859-1");
}
//
// inner class
//
static class ValidationData {
final List<byte[]> crls = new ArrayList<byte[]>();
final List<byte[]> ocsps = new ArrayList<byte[]>();
final List<byte[]> certs = new ArrayList<byte[]>();
}
//
// member variables
//
final PdfDocument pdfDocument;
final Map<PdfName,ValidationData> validated = new HashMap<PdfName,ValidationData>();
}
(AdobeLtvEnabling.java)
Example use
You can use the AdobeLtvEnabling class like this:
try ( PdfReader pdfReader = new PdfReader(SOURCE);
PdfWriter pdfWriter = new PdfWriter(TARGET);
PdfDocument pdfDocument = new PdfDocument(pdfReader, pdfWriter,
new StampingProperties().preserveEncryption().useAppendMode())) {
AdobeLtvEnabling adobeLtvEnabling = new AdobeLtvEnabling(pdfDocument);
IOcspClient ocsp = new OcspClientBouncyCastle(null);
ICrlClient crl = new CrlClientOnline();
adobeLtvEnabling.enable(ocsp, crl);
}
(MakeLtvEnabled test testLtvEnableSignWithoutLtv)
Limitations
As this code essentially is ported from the iText 5 code from the referenced answer, it also inherits the limitations listed in that answer:
The code works under some simplifying restrictions, in particular:
signature time stamps are ignored,
retrieved CRLs are assumed to be direct and complete,
the complete certificate chains are assumed to be buildable using AIA entries.
You can improve the code accordingly if these restrictions are not acceptable for you.

OpenNLP train Thai language

I am experimenting with OpenNlp 1.7.2 and maxent-3.0.0.jar to train for thai language , below is the code that reads thai train data and creates the bin model.
public class TrainPerson {
public static void main(String[] args) throws IOException {
String trainFile = "/Documents/workspace/ThaiOpenNLP/bin/thaiPerson.train";
String modelFile = "/Documents/workspace/ThaiOpenNLP/bin/th-ner-person.bin";
writePersonModel(trainFile, modelFile);
}
private static void writePersonModel(String trainFile, String modelFile)
throws FileNotFoundException, IOException {
Charset charset = Charset.forName("UTF-8");
InputStreamFactory fileInputStream = new MarkableFileInputStreamFactory(new File(trainFile));
ObjectStream<String> lineStream = new PlainTextByLineStream(fileInputStream, charset);
ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);
TokenNameFinderModel model;
try {
model = NameFinderME.train("th", "person", sampleStream , TrainingParameters.defaultParams(), new TokenNameFinderFactory());
} finally {
sampleStream.close();
}
BufferedOutputStream modelOut = null;
try {
modelOut = new BufferedOutputStream(new FileOutputStream(modelFile));
model.serialize(modelOut);
} finally {
if (modelOut != null) {
modelOut.close();
}
}
}}
Thai data looks like as attached in the file trainingData
I am using the output model to detect person name as shown in the below programme. It fails to identify the name.
public class ThaiPersonNameFinder {
static String modelFile = "/Users/avinashpaula/Documents/workspace/ThaiOpenNLP/bin/th-ner-person.bin";
public static void main(String[] args) {
try {
InputStream modelIn = new FileInputStream(new File(modelFile));
TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
NameFinderME nameFinder = new NameFinderME(model);
String sentence[] = new String[]{
"จอห์น",
"30",
"ปี",
"จะ",
"เข้าร่วม",
"ก",
"เริ่มต้น",
"ขึ้น",
"บน",
"มกราคม",
"."
};
Span nameSpans[] = nameFinder.find(sentence);
for (int i = 0; i < nameSpans.length; i++) {
System.out.println(nameSpans[i]);
}
}
catch (IOException e) {
e.printStackTrace();
}
}
}
What am i doing wrong.

convert a word documents to HTML with embedded images by TIKA

I'm new in TIKA. I try to convert Microsoft word documents to HTML by using Tika. I'm using TikaOnDotNet wrapper to used TIKA on .Net framework. My conversion code is like following:
byte[] file = Files.toByteArray(new File(#"myPath\document.doc"));
AutoDetectParser tikaParser = new AutoDetectParser();
ByteArrayOutputStream output = new ByteArrayOutputStream();
SAXTransformerFactory factory = (SAXTransformerFactory)TransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, "UTF-8");
handler.setResult(new StreamResult(output));
ExpandedTitleContentHandler handler1 = new ExpandedTitleContentHandler(handler);
tikaParser.parse(new ByteArrayInputStream(file), handler1, new Metadata());
File ofile = new File(#"C:\toHtml\text.html");
ofile.createNewFile();
DataOutputStream stream = new DataOutputStream(new FileOutputStream(ofile));
output.writeTo(stream);
everything working well except the embedded images. The generated HTML contains image tag like:
<img src="embedded:image2.wmf" alt="image2.wmf"/>
but the image source does not exists. Please advise me
Credits goes to #Gagravarr.
please note that this is a simple implementation of code, the original codes are available in comment of the questions.
This implementation is based on TikaOnDotNet wrapper.....
public class DocToHtml
{
private TikaConfig config = TikaConfig.getDefaultConfig();
public void Convert()
{
byte[] file = Files.toByteArray(new File(#"filename.doc"));
AutoDetectParser tikaParser = new AutoDetectParser();
ByteArrayOutputStream output = new ByteArrayOutputStream();
SAXTransformerFactory factory = (SAXTransformerFactory)TransformerFactory.newInstance();
var inputStream = new ByteArrayInputStream(file);
// ToHTMLContentHandler handler = new ToHTMLContentHandler();
var metaData = new Metadata();
EncodingDetector encodingDetector = new UniversalEncodingDetector();
var encode = encodingDetector.detect(inputStream, metaData) ?? new UTF_32();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, encode.toString());
handler.setResult(new StreamResult(output));
ContentHandler imageRewriting = new ImageRewritingContentHandler(handler);
// ExpandedTitleContentHandler handler1 = new ExpandedTitleContentHandler(handler);
ParseContext context = new ParseContext();
context.set(typeof(EmbeddedDocumentExtractor), new FileEmbeddedDocumentEtractor());
tikaParser.parse(inputStream, imageRewriting, new Metadata(), context);
byte[] array = output.toByteArray();
System.IO.File.WriteAllBytes(#"C:\toHtml\text.html", array);
}
private class ImageRewritingContentHandler : ContentHandlerDecorator
{
public ImageRewritingContentHandler(ContentHandler handler) : base(handler)
{
}
public override void startElement(string uri, string localName, string name, Attributes origAttrs)
{
if ("img".Equals(localName))
{
AttributesImpl attrs;
if (origAttrs is AttributesImpl)
attrs = (AttributesImpl)origAttrs;
else
attrs = new AttributesImpl(origAttrs);
for (int i = 0; i < attrs.getLength(); i++)
{
if ("src".Equals(attrs.getLocalName(i)))
{
String src = attrs.getValue(i);
if (src.StartsWith("embedded:"))
{
var newSrc = src.Replace("embedded:", #"images\");
attrs.setValue(i, newSrc);
}
}
}
attrs.addAttribute(null, "width", "width","width", "100px");
base.startElement(uri, localName, name, attrs);
}
else
base.startElement(uri, localName, name, origAttrs);
}
}
private class FileEmbeddedDocumentEtractor : EmbeddedDocumentExtractor
{
private int count = 0;
public bool shouldParseEmbedded(Metadata m)
{
return true;
}
public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, bool outputHtml)
{
Detector detector = new DefaultDetector();
string name = metadata.get("resourceName");
MediaType contentType = detector.detect(inputStream, metadata);
if (contentType.getType() != "image") return;
var embeddedFile = name;
File outputFile = new File(#"C:\toHtml\images", embeddedFile);
try
{
using (FileOutputStream os = new FileOutputStream(outputFile))
{
var tin = inputStream as TikaInputStream;
if (tin != null)
{
if (tin.getOpenContainer() != null && tin.getOpenContainer() is DirectoryEntry)
{
POIFSFileSystem fs = new POIFSFileSystem();
fs.writeFilesystem(os);
}
else
{
IOUtils.copy(inputStream, os);
}
}
}
}
catch (Exception ex)
{
throw;
}
}
}
}

Categories