Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pdf-form-lock removes all fields #22

Open
rcoryjohnson opened this issue Nov 4, 2018 · 3 comments
Open

pdf-form-lock removes all fields #22

rcoryjohnson opened this issue Nov 4, 2018 · 3 comments

Comments

@rcoryjohnson
Copy link

@galkahana I am using your form filling sample, and now I need to lock/flatten the resulting PDF. @Hatzl directed me to your lock-form branch here, and I am trying to use pdf-form-lock.js

I had to modify pdf-form-lock.js to avoid crashes. I don't think my change in collectWidgetAnnotations will be sufficient, but it avoids crashes for my testing. Here are my changes:
lock-form...rcoryjohnson:patch-1#diff-4b182c8febde9600d798afac72f21a7e

The resulting PDF has all of the fields cleared. "fillable.pdf" is the filled version with the editable fields, and "locked.pdf" is the result pdf which has been passed to lockForm(...) and has all of the fields cleared rather than just locked as intended.

locked.pdf
fillable.pdf

Any assistance will be very appreciated.

@Hatzl
Copy link

Hatzl commented Nov 5, 2018

try the following files..

[pdf-form-fill.js]

var hummus = require('hummus'),
    _ = require('lodash');

var fontArial;

/**
 * toText function. should get this into hummus proper sometimes
 */
function toText(item) {
    if(item.getType() === hummus.ePDFObjectLiteralString) {
        return item.toPDFLiteralString().toText();
    }
    else if(item.getType() === hummus.ePDFObjectHexString) {
        return item.toPDFHexString().toText();
    } else {
        return item.value;
    }
}

/**
 * a wonderfully reusable method to recreate a dict without all the keys that we want to change
 * note that it starts writing a dict, but doesn't finish it. your job
 */
function startModifiedDictionary(handles,originalDict,excludedKeys) {
    var originalDictJs = originalDict.toJSObject();
    var newDict = handles.objectsContext.startDictionary();

    Object.getOwnPropertyNames(originalDictJs).forEach(function(element,index,array) {
        if (!excludedKeys[element]) {
            newDict.writeKey(element);
            handles.copyingContext.copyDirectObjectAsIs(originalDictJs[element]);
        }
    });

    return newDict;
}

function defaultTerminalFieldWrite(handles,fieldDictionary) {
    // default write of ending field. no reason to recurse to kids
    handles.copyingContext
        .copyDirectObjectAsIs(fieldDictionary)
        .endIndirectObject();
}

/**
 * Update radio button value. look for the field matching the value, which should be an index.
 * Set its ON appearance as the value, and set all radio buttons appearance to off, but the selected one which should be on
 */
function updateOptionButtonValue(handles,fieldDictionary,value) {
    var isWidget =  fieldDictionary.exists('Subtype') && (fieldDictionary.queryObject('Subtype').toString() == 'Widget');

    if(isWidget || ! fieldDictionary.exists('Kids')) {
        // this radio button has just one option and its in the widget. also means no kids
        var modifiedDict = startModifiedDictionary(handles,fieldDictionary,{'V':-1,'AS':-1});
        var appearanceName;
        if(value === null || value === 0) {
            // false is easy, just write '/Off' as the value and as the appearance stream
            appearanceName = 'Off';
        }
        else {
            // grab the non off value. that should be the yes one
            var apDictionary = handles.reader.queryDictionaryObject(fieldDictionary,'AP').toPDFDictionary();
            var nAppearances = handles.reader.queryDictionaryObject(apDictionary,'N').toPDFDictionary().toJSObject();
            appearanceName = _.find(Object.keys(nAppearances),function(item){return item !== 'Off'});
        }
        modifiedDict
            .writeKey('V')
            .writeNameValue(appearanceName)
            .writeKey('AS')
            .writeNameValue(appearanceName);

        handles.objectsContext
            .endDictionary(modifiedDict)
            .endIndirectObject();
    } else {
        // Field. this would mean that there's a kid array, and there are offs and ons to set
        var modifiedDict = startModifiedDictionary(handles,fieldDictionary,{'V':-1,'Kids':-1});
        var kidsArray = handles.reader.queryDictionaryObject(fieldDictionary,'Kids').toPDFArray();

        // set the V value on the new field dictionary
        modifiedDict
            .writeKey('V')
            .writeNameValue(value === true ? 'Yes' : value || 'Off');

        // write the Kids key before we write the kids array
        modifiedDict.writeKey('Kids')

        // write the kids array, similar to writeFilledFields, but knowing that these are widgets and that AS needs to be set
        var fieldsReferences = writeKidsAndEndObject(handles,modifiedDict,kidsArray);

        // recreate widget kids, turn on or off based on their relation to the target value
        for(var i=0;i<fieldsReferences.length;++i) {
            var fieldReference = fieldsReferences[i];
            var sourceField;

            if(fieldReference.existing) {
                handles.objectsContext.startModifiedIndirectObject(fieldReference.id);
                sourceField = handles.reader.parseNewObject(fieldReference.id).toPDFDictionary();
            } else {
                handles.objectsContext.startNewIndirectObject(fieldReference.id);
                sourceField = fieldReference.field.toPDFDictionary();
            }

            var widgetDictionary = handles.reader.queryArrayObject(kidsArray,i).toPDFDictionary();
            var apDictionary = handles.reader.queryDictionaryObject(widgetDictionary,'AP').toPDFDictionary();
            var nAppearances = handles.reader.queryDictionaryObject(apDictionary,'N').toPDFDictionary().toJSObject();
            const match = _.some(Object.keys(nAppearances),i => i === value || (value === true && i === 'Yes'));

            var modifiedFieldDict = startModifiedDictionary(handles,sourceField,{'AS': -1});
            modifiedFieldDict
                .writeKey('AS')
                .writeNameValue(match ? (value === true ? 'Yes' : value || 'Off') : 'Off');

            // finish
            handles.objectsContext
                .endDictionary(modifiedFieldDict)
                .endIndirectObject();
        }

    }
}

function writeAppearanceXObjectForText(handles,formId,fieldsDictionary,text,inheritedProperties) {
    var rect = handles.reader.queryDictionaryObject(fieldsDictionary,'Rect').toPDFArray().toJSArray();
    let da = fieldsDictionary.exists('DA') ? fieldsDictionary.queryObject('DA').toString():inheritedProperties['DA'];

    // register to copy resources from form default resources dict [would have been better to just refer to it...but alas don't have access for xobject resources dict]
  /*  if(handles.acroformDict.exists('DR')) {
        handles.writer.getEvents().once('OnResourcesWrite',function(args){
            // copy all but the keys that exist already
            var dr = handles.reader.queryDictionaryObject(handles.acroformDict,'DR').toPDFDictionary().toJSObject();
            Object.getOwnPropertyNames(dr).forEach(function(element,index,array) {
                if (element !== 'ProcSet') {
                    args.pageResourcesDictionaryContext.writeKey(element);
                    handles.copyingContext.copyDirectObjectAsIs(dr[element]);
                }
            });
        });
    }*/

    var xobjectForm = handles.writer.createFormXObject(
        0,
        0,
        rect[2].value - rect[0].value,
        rect[3].value - rect[1].value,
        formId);



    xobjectForm.getContentContext()
            .writeFreeCode('/Tx BMC\r\n')
            .q()
            .BT()
            .writeFreeCode(da + '\r\n')
            .Ts(3)
            .Tf(fontArial,10)
            .Tj(text)
            .ET()
            .Q()
            .writeFreeCode('EMC');


    handles.writer.endFormXObject(xobjectForm);
}

function writeFieldWithAppearanceForText(handles,targetFieldDict,sourceFieldDictionary,appearanceInField,textToWrite,inheritedProperties) {
    // determine how to write appearance
    if(appearanceInField) {
        var newAppearanceFormId = handles.objectsContext.allocateNewObjectID();
        // Appearance in field - so write appearance dict in field
        targetFieldDict
            .writeKey('AP');

        var apDict = handles.objectsContext.startDictionary();
        apDict.writeKey("N").writeObjectReferenceValue(newAppearanceFormId);
        handles.objectsContext
            .endDictionary(apDict)
            .endDictionary(targetFieldDict)
            .endIndirectObject();

        // write the new stream xobject
        writeAppearanceXObjectForText(handles,newAppearanceFormId,sourceFieldDictionary,textToWrite,inheritedProperties);
    }
    else {
        // write in kid (there should be just one)
        var kidsArray = handles.reader.queryDictionaryObject(sourceFieldDictionary,'Kids').toPDFArray();
        // write the Kids key before we write the kids array
        targetFieldDict.writeKey('Kids')
        var fieldsReferences = writeKidsAndEndObject(handles,targetFieldDict,kidsArray);

        for(let i = 0; i < fieldsReferences.length; i++) {
            var newAppearanceFormId = handles.objectsContext.allocateNewObjectID();
            // recreate widget kid, with new stream reference
            let fieldReference = fieldsReferences[i];
            let sourceField;

            if(fieldReference.existing) {
                handles.objectsContext.startModifiedIndirectObject(fieldReference.id);
                sourceField = handles.reader.parseNewObject(fieldReference.id).toPDFDictionary();
            } else {
                handles.objectsContext.startNewIndirectObject(fieldReference.id);
                sourceField = fieldReference.field.toPDFDictionary();
            }

            let modifiedFieldDict = startModifiedDictionary(handles,sourceField,{'AP': -1});
            modifiedFieldDict
                .writeKey('AP');

            let apDict = handles.objectsContext.startDictionary();
            apDict.writeKey("N").writeObjectReferenceValue(newAppearanceFormId);
            handles.objectsContext
                .endDictionary(apDict)
                .endDictionary(modifiedFieldDict)
                .endIndirectObject();

            // write the new stream xobject
            writeAppearanceXObjectForText(handles,newAppearanceFormId,sourceField,textToWrite,inheritedProperties);
        }
    }
}

function updateTextValue(handles,fieldDictionary,value,isRich,inheritedProperties) {
    if(typeof(value) === 'string') {
        value = {v:value,rv:value};
    }

    var appearanceInField =  fieldDictionary.exists('Subtype') && (fieldDictionary.queryObject('Subtype').toString() == 'Widget') || !fieldDictionary.exists('Kids');
    var fieldsToRemove = {'V': -1,'Kids': -1};
    if(appearanceInField) {
        // add skipping AP if in field (and not in a child widget)
        fieldsToRemove['AP'] = -1;
    }
    if(isRich) {
        // skip RV if rich
        fieldsToRemove['RV'] = -1;
    }

    var modifiedDict = startModifiedDictionary(handles,fieldDictionary,fieldsToRemove);

    // start with value, setting both plain value and rich value
    modifiedDict
        .writeKey('V')
        .writeLiteralStringValue(new hummus.PDFTextString(value['v']).toBytesArray());

    if(isRich) {
        modifiedDict
            .writeKey('RV')
            .writeLiteralStringValue(new hummus.PDFTextString(value['rv']).toBytesArray());
    }

    writeFieldWithAppearanceForText(handles,modifiedDict,fieldDictionary,appearanceInField,value['v'],inheritedProperties);
}

function updateChoiceValue(handles,fieldDictionary,value,inheritedProperties) {
    var appearanceInField =  fieldDictionary.exists('Subtype') && (fieldDictionary.queryObject('Subtype').toString() == 'Widget') || !fieldDictionary.exists('Kids');
    var fieldsToRemove = {'V':-1};
    if(appearanceInField) {
        // add skipping AP if in field (and not in a child widget)
        fieldsToRemove['AP'] = -1;
    }

    var modifiedDict = startModifiedDictionary(handles,fieldDictionary,fieldsToRemove);

    // start with value, setting per one or multiple selection. also choose the text to write in appearance
    var textToWrite;
    if(typeof(value) === 'string') {
        // one option
        modifiedDict
            .writeKey('V')
            .writeLiteralStringValue(new hummus.PDFTextString(value).toBytesArray());
        textToWrite = value;
    }
    else {
        // multiple options
        modifiedDict
            .writeKey('V');
        handles.objectsContext.startArray();
        value.forEach(function(singleValue) {
            handles.objectsContext.writeLiteralString(new hummus.PDFTextString(singleValue).toBytesArray());
        });
        handles.objectsContext.endArray();
        textToWrite = value.length > 0 ? value[0]:'';
    }

    writeFieldWithAppearanceForText(handles,modifiedDict,fieldDictionary,appearanceInField,textToWrite,inheritedProperties);
}

/**
 * Update a field. splits to per type functions
 */
function updateFieldWithValue(handles,fieldDictionary,value,inheritedProperties) {
    // Update a field with value. There is a logical assumption made here:
    // This must be a terminal field. meaning it is a field, and it either has no kids, it also holding
    // Widget data or that it has one or more kids defining its widget annotation(s). Normally it would be
    // One but in the case of a radio button, where there's one per option.
    var localFieldType = fieldDictionary.exists('FT') ? fieldDictionary.queryObject('FT').toString():undefined,
        fieldType = localFieldType || inheritedProperties['FT'],
        localFlags = fieldDictionary.exists('Ff') ? fieldDictionary.queryObject('Ff').toNumber():undefined,
        flags = localFlags === undefined ? inheritedProperties['Ff'] : localFlags;

    // the rest is fairly type dependent, so let's check the type
    switch(fieldType) {
        case 'Btn': {
            if((flags>>16) & 1)
            {
                // push button. can't write a value. forget it.
                defaultTerminalFieldWrite(handles,fieldDictionary);
            }
            else
            {
                // checkbox or radio button
                updateOptionButtonValue(handles,fieldDictionary,value);
            }
            break;
        }
        case 'Tx': {
            // rich or plain text
            updateTextValue(handles,fieldDictionary,value,(flags>>25) & 1,inheritedProperties);
            break;
        }
        case 'Ch': {
            updateChoiceValue(handles,fieldDictionary,value,inheritedProperties);
            break;
        }
        case 'Sig': {
            // signature, ain't handling that. should return or throw an error sometimes
            defaultTerminalFieldWrite(handles,fieldDictionary);
            break;
        }
        default: {
            // in case there's a fault and there's no type, or it's irrelevant
            defaultTerminalFieldWrite(handles,fieldDictionary);
        }
    }
}

function writeFieldAndKids(handles,fieldDictionary,inheritedProperties,baseFieldName) {
    // this field or widget doesn't need value rewrite. but its kids might. so write the dictionary as is, dropping kids.
    // write them later and recurse.


    var modifiedFieldDict = startModifiedDictionary(handles,fieldDictionary,{'Kids':-1});
    // if kids exist, continue to them for extra filling!
    var kids = fieldDictionary.exists('Kids') ?
        handles.reader.queryDictionaryObject(fieldDictionary,'Kids').toPDFArray() :
        null;

    if(kids) {
        var localEnv = {}

        // prep some inherited values and push env
        if(fieldDictionary.exists('FT'))
            localEnv['FT'] = fieldDictionary.queryObject('FT').toString();
        if(fieldDictionary.exists('Ff'))
            localEnv['Ff'] = fieldDictionary.queryObject('Ff').toNumber();
        if(fieldDictionary.exists('DA'))
            localEnv['DA'] = fieldDictionary.queryObject('DA').toString();
        if(fieldDictionary.exists('Opt'))
            localEnv['Opt'] = fieldDictionary.queryObject('Opt').toPDFArray();

        modifiedFieldDict.writeKey('Kids');
        // recurse to kids. note that this will take care of ending this object
        writeFilledFields(handles,modifiedFieldDict,kids,_.extend({},inheritedProperties,localEnv),baseFieldName + '.');
    } else {
        // no kids, can finish object now
        handles.objectsContext
            .endDictionary(modifiedFieldDict)
            .endIndirectObject();
    }
}

/**
 * writes a single field. will fill with value if found in data.
 * assuming that's in indirect object and having to write the dict,finish the dict, indirect object and write the kids
 */
function writeFilledField(handles,fieldDictionary,inheritedProperties,baseFieldName) {
    var localFieldNameT = fieldDictionary.exists('T') ? toText(fieldDictionary.queryObject('T')):undefined,
        fullName = localFieldNameT === undefined ? baseFieldName : (baseFieldName + localFieldNameT);

    // Based on the fullName we can now determine whether the field has a value that needs setting
    if(handles.data[fullName] !== undefined && handles.data[fullName] !== null) {
        // We got a winner! write with updated value
        updateFieldWithValue(handles,fieldDictionary,handles.data[fullName],inheritedProperties);
    }
    else {
        // Not yet. write and recurse to kids
        writeFieldAndKids(handles,fieldDictionary,inheritedProperties,fullName);
    }
}

/**
 * Write kids array converting each direct kids to an indirect one
 */
function writeKidsAndEndObject(handles,parentDict,kidsArray) {
    var fieldsReferences = [],
        fieldJSArray = kidsArray.toJSArray();

    handles.objectsContext.startArray();
    fieldJSArray.forEach(function(field) {
        if(field.getType() === hummus.ePDFObjectIndirectObjectReference) {
            // existing reference, keep as is
            handles.copyingContext.copyDirectObjectAsIs(field);
            fieldsReferences.push({existing:true,id:field.toPDFIndirectObjectReference().getObjectID()});
        }
        else {
            var newFieldObjectId = handles.objectsContext.allocateNewObjectID();
            // direct object, recreate as reference
            fieldsReferences.push({existing:false,id:newFieldObjectId,theObject:field});
            handles.copyingContext.writeIndirectObjectReference(newFieldObjectId);
        }
    });
    handles.objectsContext
        .endArray(hummus.eTokenSeparatorEndLine)
        .endDictionary(parentDict)
        .endIndirectObject();

    return fieldsReferences;
}

/**
 * write fields/kids array of dictionary. make sure all become indirect, for the sake of simplicity,
 * which is why it gets to take care of finishing the writing of the said dict
 */
function writeFilledFields(handles,parentDict,fields,inheritedProperties,baseFieldName) {
    var fieldsReferences = writeKidsAndEndObject(handles,parentDict, fields);

    // now recreate the fields, filled this time (and down the recursion hole...)
    fieldsReferences.forEach(function(fieldReference) {
        if(fieldReference.existing) {
            handles.objectsContext.startModifiedIndirectObject(fieldReference.id);
            writeFilledField(handles,handles.reader.parseNewObject(fieldReference.id).toPDFDictionary(),inheritedProperties,baseFieldName);
        }
        else {
            handles.objectsContext.startNewIndirectObject(fieldReference.id);
            writeFilledField(handles,fieldReference.field.toPDFDictionary(),inheritedProperties,baseFieldName);
        }
    });
}

/**
 * Write a filled form dictionary, and its subordinate fields.
 * assumes in an indirect object, so will finish it
 */
function writeFilledForm(handles,acroformDict) {
    var modifiedAcroFormDict = startModifiedDictionary(handles,acroformDict,{'Fields':-1});

    var fields = acroformDict.exists('Fields') ?
        handles.reader.queryDictionaryObject(acroformDict,'Fields').toPDFArray() :
        null;

    if(fields) {
        modifiedAcroFormDict.writeKey('Fields');
        writeFilledFields(handles,modifiedAcroFormDict,fields,{},''); // will also take care of finishing the dictionary and indirect object, so no need to finish after
    } else {
        handles
            .objectsContext.endDictionary(modifiedAcroFormDict)
            .objectsContext.endIndirectObject();
    }
}

function fillForm(writer,data) {

    fontArial = writer.getFontForFile(__dirname + '/arial.ttf');

    // setup parser
    var reader =  writer.getModifiedFileParser();



    // start out by finding the acrobat form
    var catalogDict =  reader.queryDictionaryObject(reader.getTrailer(),'Root').toPDFDictionary(),
        acroformInCatalog = catalogDict.exists('AcroForm') ? catalogDict.queryObject('AcroForm'):null;

    if(!acroformInCatalog)
        return new Error('form not found!');

    // setup copying context, and keep reference to objects context as well
    var copyingContext = writer.createPDFCopyingContextForModifiedFile();
    var objectsContext = writer.getObjectsContext();

    // parse the acroform dict
    var acroformDict = catalogDict.exists('AcroForm') ? reader.queryDictionaryObject(catalogDict,'AcroForm'):null;

    // lets put all the basics in a nice "handles" package, so we don't have to pass each of them all the time
    var handles = {
        writer:writer,
        reader:reader,
        copyingContext:copyingContext,
        objectsContext:objectsContext,
        data:data,
        acroformDict:acroformDict
    };

    // recreate a copy of the existing form, which we will fill with data.
    if(acroformInCatalog.getType() === hummus.ePDFObjectIndirectObjectReference) {
        // if the form is a referenced object, modify it
        var acroformObjectId = acroformInCatalog.toPDFIndirectObjectReference().getObjectID();
        objectsContext.startModifiedIndirectObject(acroformObjectId);

        writeFilledForm(handles,acroformDict);
    } else {
        // otherwise, recreate the form as an indirect child (this is going to be a general policy, we're making things indirect. it's simpler), and recreate the catalog
        var catalogObjectId = reader.getTrailer().queryObject('Root').toPDFIndirectObjectReference().getObjectID();
        var newAcroformObjectId = objectsContext.allocateNewObjectID();

        // recreate the catalog with form pointing to new reference
        objectsContext.startModifiedIndirectObject(catalogObjectId);
        modifiedCatalogDictionary = startModifiedDictionary(handles,catalogDict,{'AcroForm':-1});

        modifiedCatalogDictionary.writeKey('AcroForm');
        modifiedCatalogDictionary.writeObjectReferenceValue(newAcroformObjectId);
        objectsContext
            .endDictionary(modifiedCatalogDictionary)
            .endIndirectObject();

        // now create the new form object
        objectsContext.startNewIndirectObject(newAcroformObjectId);

        writeFilledForm(handles,acroformDict);
    }
}

module.exports = {
    fillForm:fillForm
}

[pdf-form-lock.js]

var hummus = require('hummus')
var _ = require('lodash')

function startModifiedDictionary(handles,originalDict,excludedKeys) {
    var originalDictJs = originalDict.toJSObject();
    var newDict = handles.objectsContext.startDictionary();

    Object.getOwnPropertyNames(originalDictJs).forEach(function(element,index,array) {
        if (!excludedKeys[element]) {
            newDict.writeKey(element);
            handles.copyingContext.copyDirectObjectAsIs(originalDictJs[element]);
        }
    });

    return newDict;
}

function collectWidgetAnnotations(reader, pageDictionary) {
    // look for widget annotations, which are the form fields presentation on the page. we need to turn
    // them to simple overlays of appearance graphics, instead of the original interactive object.
    // hance - remove the annotation, and replace with graphic overlay of placing its appearance form
    var widgetAnnotatons = []
    if(pageDictionary.exists('Annots')) {
        var annotationsArray = reader.queryDictionaryObject(pageDictionary,'Annots').toPDFArray();
        for(var i = 0; i < annotationsArray.getLength();++i) {
            var annotationObject = reader.queryArrayObject(annotationsArray,i).toPDFDictionary();
            var isWidget =  annotationObject.queryObject('Subtype').toString() == 'Widget';
            if(isWidget) {
                // find the appearance xobject id that represents this annoation appearance
                var apDictionary = reader.queryDictionaryObject(annotationObject,'AP').toPDFDictionary();
                var nAppearances = reader.queryDictionaryObject(apDictionary,'N');
                if(nAppearances.getType() === hummus.ePDFObjectDictionary) {
                    var nAppearancesDict = nAppearances.toPDFDictionary().toJSObject();
                    var appearanceObjectId = null;
                    if(Object.keys(nAppearancesDict).length === 1) {
                        // if one appearance in nAppearances, than it is the appearance stream to use. keep it
                        appearanceObjectId = nAppearancesDict[Object.keys(nAppearancesDict)[0]].toPDFIndirectObjectReference().getObjectID();
                    }
                    else {
                        // otherwise, consult AS entry for the one to take
                        if(annotationObject.exists('AS')) {
                            var appearanceName = annotationObject.queryObject('AS').toString();
                            appearanceObjectId = nAppearancesDict[appearanceName].toPDFIndirectObjectReference().getObjectID()
                        }
                    }
                }
                else {
                    // stream, this means a single appearance. record its object Id
                    appearanceObjectId = apDictionary.queryObject('N').toPDFIndirectObjectReference().getObjectID();
                }
                if(appearanceObjectId)
                    widgetAnnotatons.push({
                        id:appearanceObjectId,
                        rect: _.map(reader.queryDictionaryObject(annotationObject,'Rect').toPDFArray().toJSArray(),function(item){return item.toNumber()})
                    })
            }
        }
    }

    return widgetAnnotatons;
}

function writeNewXObjectsWithPrefix(xobjects, prefix,widgetAnnoations) {
    var results = [];
    widgetAnnoations.forEach(function(item,index) {
        let formObjectName = prefix + '_'  + index;
        xobjects.writeKey(formObjectName);
        xobjects.writeObjectReferenceValue(item.id);
        results.push({
            name:formObjectName,
            rect:item.rect
        });
    });
    return results;
}

function writeNewXObjectDict(resources, objectsContext,widgetAnnoations) {
    var results = [];
    resources.writeKey('XObject');
    let xobjects = objectsContext.startDictionary();
    results = writeNewXObjectsWithPrefix(xobjects,'myForm', widgetAnnoations);
    objectsContext.endDictionary(xobjects);
    return results;
}

function writeNewResourcesDictionary(objectsContext,widgetAnnoations) {
    let resources = objectsContext.startDictionary();
    var results = writeNewXObjectDict(resources,objectsContext,widgetAnnoations);
    objectsContext.endDictionary(resources);

    return results;
}

function findInheritedResources(reader,dict) {
    if(dict.exists('Resources')) {
        return reader.queryDictionaryObject(dict,'Resources').toPDFDictionary();
    }
    else {
        var parentDict = dict.exists('Parent') ? reader.queryDictionaryObject(dict,'Parent').toPDFDictionary() : null;
        if(!parentDict)
            return null
        return findInheritedResources(reader,parentDict)
    }
}

function getDifferentChar(inCharCode) {
    // numerals
    if(inCharCode >= 0x30 && inCharCode <= 0x38)
        return inCharCode+1;
    if(inCharCode == 0x39)
        return 0x30;

    // lowercase
    if(inCharCode >= 0x61 && inCharCode <= 0x79)
        return inCharCode+1;
    if(inCharCode == 0x7a)
        return 0x61;

    // uppercase
    if(inCharCode >= 0x41 && inCharCode <= 0x59)
        return inCharCode+1;
    if(inCharCode == 0x5a)
        return 0x41;

    return 0x41;
}

function writeModifiedResourcesDict(handles, resources, widgetAnnoations) {
    var results;
    var objectsContext = handles.objectsContext;
    var reader = handles.reader;
    var copyingContext = handles.copyingContext;

    var modifiedResourcesDict = startModifiedDictionary(handles,resources,{'XObject':-1})

    if(resources.exists('XObject')){
        modifiedResourcesDict.writeKey('XObject');
        let xobjects = objectsContext.startDictionary();
        var existingXObjectsDict = reader.queryDictionaryObject(resources,'XObject').toPDFDictionary().toJSObject();
        // copy existing names, while at it creating a new different prefix name for new xobjects
        var i = 0;
        var newObjectPrefix = ''
        Object.getOwnPropertyNames(existingXObjectsDict).forEach(function(name) {
            xobjects.writeKey(name);
            copyingContext.copyDirectObjectAsIs(existingXObjectsDict[name]);
            newObjectPrefix += String.fromCharCode(getDifferentChar((name.length >= i+1) ? name.charCodeAt(i): 0x39));
            ++i;
        });

        results = writeNewXObjectsWithPrefix(xobjects,newObjectPrefix, widgetAnnoations);
        objectsContext.endDictionary(xobjects);
    }
    else {
        //results = writeNewXObjectDict(resources,objectsContext,widgetAnnoations);
        results = writeNewXObjectDict(modifiedResourcesDict,objectsContext,widgetAnnoations);
    }
    objectsContext
        .endDictionary(modifiedResourcesDict)
    return results;
}


function writeToStreamCxt(streamCxt,str) {
    var bytes = [];
    for (var i = 0; i < str.length; ++i) {
        var code = str.charCodeAt(i);
        bytes = bytes.concat([code]);
    }
    streamCxt.getWriteStream().write(bytes)
}

function lockWidgetAnnotationsForPage(handles,pageObjectId,pageDictionary,widgetAnnotatons) {
    if(widgetAnnotatons.length == 0) // nothing much to do here without widget annoations. so let's keep this for "at least one"
        return;

    var objectsContext = handles.objectsContext;
    var copyingContext = handles.copyingContext;
    var reader = handles.reader;


    // rewrite page object. we'll need to remove the widget annotations, create new content overlay
    // and add annotation forms to the page resources dict...easy
    objectsContext.startModifiedIndirectObject(pageObjectId);
    let modifiedPageDictionary = startModifiedDictionary(handles,pageDictionary,{'Annots':-1, 'Resources': -1, 'Contents': -1});

    // 1. rewrite the annots entry, without the widget annotations (don't mind if it's empty now)
    modifiedPageDictionary.writeKey('Annots');
    objectsContext.startArray();
    var annotationsArray = reader.queryDictionaryObject(pageDictionary,'Annots').toPDFArray();
    for(var i = 0; i < annotationsArray.getLength();++i) {
        var annotationObject = reader.queryArrayObject(annotationsArray,i).toPDFDictionary();
        var isWidget =  annotationObject.queryObject('Subtype').toString() == 'Widget';
        if(!isWidget) {
            copyingContext.copyDirectObjectAsIs(annotationObject);
        }
    }
    objectsContext.endArray();
    objectsContext.endLine();

    // 2. write new contents entry, with a new overlay entry

    // Content IDs that we'll use to introduce new overlay (the pre one is just to protect the matrix)
    var preContent = objectsContext.allocateNewObjectID();
    var postContent = objectsContext.allocateNewObjectID();

    var existingContentsStreamsIds = [];
    if(pageDictionary.exists('Contents')) {
        var contents = reader.queryDictionaryObject(pageDictionary,'Contents')
        if(contents.getType() === hummus.ePDFObjectStream) {
            // single content stream case
            existingContentsStreamsIds.push(
                pageDictionary.queryObject('Contents').toPDFIndirectObjectReference().getObjectID()
            )
        }
        else if(contents.getType() === hummus.ePDFObjectArray) {
            // multiple content streams. get all object ids
            var contentsArray = reader.queryDictionaryObject(pageDictionary,'Contents').toPDFArray();
            for(var i = 0; i < annotationsArray.getLength();++i) {
                existingContentsStreamsIds.push(contentsArray.queryObject(i).toPDFIndirectObjectReference().getObjectID());
            }
        }
    }
    // got existing content streams IDs, let's re-write, adding pre-stream, and post-stream
    modifiedPageDictionary.writeKey('Contents');
    objectsContext.startArray();
    objectsContext.writeIndirectObjectReference(preContent);
    existingContentsStreamsIds.forEach(function(item){
        objectsContext.writeIndirectObjectReference(item)
    });
    objectsContext.writeIndirectObjectReference(postContent);
    objectsContext.endArray();
    objectsContext.endLine();

    // 3. write new resources dict with the new resources. this part is a bit annoying with all the various options
    modifiedPageDictionary.writeKey('Resources');
    if(pageDictionary.exists('Resources')) {
        widgetAnnotatons = writeModifiedResourcesDict(handles,  reader.queryDictionaryObject(pageDictionary,'Resources').toPDFDictionary(), widgetAnnotatons);
    }
    else {
        var parentDict = pageDictionary.exists('Parent') ? reader.queryDictionaryObject(pageDictionary,'Parent').toPDFDictionary() : null
        if(!parentDict) {
            widgetAnnotatons = writeNewResourcesDictionary(objectsContext,widgetAnnotatons);
        }
        else {
            var inheritedResources = findInheritedResources(reader,parentDict);
            if(!inheritedResources) {
                widgetAnnotatons = writeNewResourcesDictionary(objectsContext,widgetAnnotatons);
            }
            else {
                widgetAnnotatons = writeModifiedResourcesDict(handles, inheritedResources, widgetAnnotatons);
            }
        }
    }

    objectsContext
        .endDictionary(modifiedPageDictionary)
        .endIndirectObject();

    // now write the new overlay placing all the widget annoation forms

    // first write stream with just a save, to encapsulate what unwanted graphic state changes
    // the existing content has
    objectsContext.startNewIndirectObject(preContent);
    var preStreamCxt = objectsContext.startPDFStream();
    writeToStreamCxt(preStreamCxt,"q\r\n");
    objectsContext.endPDFStream(preStreamCxt);
    objectsContext.endIndirectObject();

    // now the 2nd one, iterate the widget annotations, write the forms
    objectsContext.startNewIndirectObject(postContent);
    var postStreamCxt = objectsContext.startPDFStream();
    writeToStreamCxt(postStreamCxt,"Q\r\n");

    // iterate widget annotations and write their placement code
    widgetAnnotatons.forEach(function(item){
        writeToStreamCxt(postStreamCxt,"q\r\n");
        writeToStreamCxt(postStreamCxt,"1 0 0 1 " + item.rect[0] + " " + item.rect[1] + " cm\r\n");
        writeToStreamCxt(postStreamCxt,"/" + item.name + " Do\r\n");
        writeToStreamCxt(postStreamCxt,"Q\r\n");
    });
    objectsContext.endPDFStream(postStreamCxt);
    objectsContext.endIndirectObject();
}

var BUFFER_SIZE = 10000;

function convertWidgetAnnotationsToForm(handles,widgetAnnoations) {
    var reader = handles.reader;
    var objectsContext = handles.objectsContext;

    // just make sure that the widget annotation can qualify as a form xobject (just that it has type and subtype...sometimes they don't)
    widgetAnnoations.forEach(function(item){
        var xobjectStream = reader.parseNewObject(item.id).toPDFStream();
        var widgetDictionary = xobjectStream.getDictionary();
        if(!widgetDictionary.exists('Subtype') || !widgetDictionary.exists('Type')) {
            objectsContext.startModifiedIndirectObject(item.id);
            var dict = startModifiedDictionary(handles,widgetDictionary,{'Subtype':-1,'Type':-1, 'Length':-1, 'Filter':-1,'DecodeParams':-1});
            dict.writeKey('Type');
            dict.writeNameValue('XObject');
            dict.writeKey('Subtype');
            dict.writeNameValue('Form');
            var streamCxt = objectsContext.startPDFStream(dict);
            var streamWriteStream = streamCxt.getWriteStream();
            var readStream = reader.startReadingFromStream(xobjectStream);
            while(readStream.notEnded())
            {
                var readData = readStream.read(BUFFER_SIZE);
                streamWriteStream.write(readData);
            }

            objectsContext.endPDFStream(streamCxt);
            objectsContext.endIndirectObject();
        }


    });
}

function lockPages(handles) {
    var reader = handles.reader;

    // iterate pages, and lock the fields on them
    for(var i=0;i<reader.getPagesCount();++i) {
        var pageDictionary = reader.parsePageDictionary(i);
        var widgetAnnotatons = collectWidgetAnnotations(reader,pageDictionary)
        convertWidgetAnnotationsToForm(handles,widgetAnnotatons);
        lockWidgetAnnotationsForPage(handles,reader.getPageObjectID(i),pageDictionary,widgetAnnotatons);
    }
}

function removeForm(handles) {
    // rewrite catalog without the form
    var reader = handles.reader;
    var objectsContext = handles.objectsContext;

    var catalogDict =  reader.queryDictionaryObject(reader.getTrailer(),'Root').toPDFDictionary();
    var catalogObjectId = reader.getTrailer().queryObject('Root').toPDFIndirectObjectReference().getObjectID();
    objectsContext.startModifiedIndirectObject(catalogObjectId);
    let modifiedCatalogDictionary = startModifiedDictionary(handles,catalogDict,{'AcroForm':-1});
    objectsContext
        .endDictionary(modifiedCatalogDictionary)
        .endIndirectObject();

    // mark form object for deletion
    var acroformInCatalog = catalogDict.exists('AcroForm') ? catalogDict.queryObject('AcroForm'):null;
    if(!!acroformInCatalog && (acroformInCatalog.getType() === hummus.ePDFObjectIndirectObjectReference)) {
        var acroformObjectId = acroformInCatalog.toPDFIndirectObjectReference().getObjectID();
        objectsContext.deleteObject(acroformObjectId);
    }

}


function lockForm(writer) {
    var handles = {
        writer : writer,
        reader: writer.getModifiedFileParser(),
        copyingContext : writer.createPDFCopyingContextForModifiedFile(),
        objectsContext: writer.getObjectsContext()
    }

    lockPages(handles);
    removeForm(handles);
}


module.exports = {
    lockForm: lockForm
};

Don't forget to download/add the arial.ttf file and maybe change the path in pdf-form-fill.js!!!!

Implement it like this:

//import dependencies
let fillForm = require('./pdf-form-fill').fillForm;
let lockForm = require('./pdf-form-lock').lockForm;
import fs from 'fs';

//create writer for form-fill
var writer = hummus.createWriterToModify(__dirname + "/PDFTemplate.pdf", {
                modifiedFilePath: __dirname + '/_tmp_filled_form.pdf'
            });

//fill form with data
fillForm(writer,fillData);
//finish this...
writer.end();

//create writer for form-lock
writer = hummus.createWriterToModify(__dirname + '/_tmp_filled_form.pdf', {
                modifiedFilePath: __dirname + '/result.pdf'
            });

//lock form
lockForm(writer);
//finish it...
writer.end();

//delete temporary file...
fs.unlink(__dirname + '/_tmp_filled_form.pdf', (err) => { if(err) { console.log("Cannot delete temporary pdf file: "+ err)}} );

@rcoryjohnson
Copy link
Author

@Hatzl Thank you! This got me further along - most fields do show up now. I still have the issue with the result of reader.queryDictionaryObject(annotationObject,'AP') in collectWidgetAnnotations being undefined in some cases, and I think my simple workaround of just leaving out those widgets is causing some fields to disappear still. We are going to revisit this work in a couple of weeks and I will post my final results if we make any progress.

@Hatzl
Copy link

Hatzl commented Nov 6, 2018

Maybe you can delete all form elements from your PDF-File and recreate the whole form with https://www.pdfescape.com . That worked for me, maybe its an option for you, too.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants