Skip to content

Commit

Permalink
performance: pnmtops to temp file
Browse files Browse the repository at this point in the history
For some reason when piping the output of pnmtops directly to ps2pdf,
conversion is very slow (especially for color scans).

When writing to an intermediate temporary file, conversion is fast.

Before:

________________________________________________________
Executed in   19.20 secs   fish           external
  usr time    4.01 secs    0.32 millis    4.01 secs
  sys time   20.63 secs    5.30 millis   20.62 secs

After:

________________________________________________________
Executed in  368.06 millis    fish           external
   usr time  378.06 millis    0.00 millis  378.06 millis
   sys time  100.00 millis    2.79 millis   97.21 millis

An improvement of over 50 times!

This should resolve #19.
  • Loading branch information
rocketraman committed Mar 11, 2021
1 parent dad7167 commit d33c8ca
Showing 1 changed file with 16 additions and 7 deletions.
23 changes: 16 additions & 7 deletions scan_perpage
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ IMAGE_PATH=$1
IMAGE_DIR=$(dirname $1)
IMAGE_FILE=$(basename $1)

TIMEVERBOSE=
if [[ $VERBOSE == 1 ]]; then
TIMEVERBOSE=time
fi

process_page() {
log ""
log "-------------------------------------------------------------------------------"
Expand All @@ -80,14 +85,16 @@ process_page() {
if [[ $VERBOSE == 1 ]]; then
UNPAPERVERBOSE="-v"
fi
#runconstrained unpaper $UNPAPERVERBOSE --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout
runconstrained unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_PATH $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | logstdout
#runconstrained $TIMEVERBOSE unpaper $UNPAPERVERBOSE --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout
runconstrained $TIMEVERBOSE unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_PATH $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | logstdout
fi
if [[ $SEARCHABLE == 1 ]]; then
log "Converting image data to searchable pdf..."
# tesseract uses the input's DPI header, we need to convert to a format that supports this (like tiff)
runconstrained convert -density ${RESOLUTION}x${RESOLUTION} -units PixelsPerInch $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff | logstdout
runconstrained tesseract $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout
log "...Running convert"
runconstrained $TIMEVERBOSE convert -density ${RESOLUTION}x${RESOLUTION} -units PixelsPerInch $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff | logstdout
log "...Running tesseract"
runconstrained $TIMEVERBOSE tesseract $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout
[[ -f $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.tiff
else
log "Converting image data to pdf..."
Expand All @@ -101,9 +108,11 @@ process_page() {
if [[ $VERBOSE = 1 && ! "$(pnmtops -verbose 2>&1 < /dev/null)" =~ "unrecognized option" ]]; then
PNMVERBOSE="-verbose"
fi
log "Using page options: $PAGEOPTS"
runconstrained pnmtops $PNMVERBOSE $PAGEOPTS $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | ps2pdf $PS2PDF_OPTS - > $IMAGE_DIR/${IMAGE_FILE%.*}.pdf | logstdout
[[ -f $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.ps ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.ps
log "...Running pnmtops on $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE using page options: $PAGEOPTS"
runconstrained $TIMEVERBOSE pnmtops $PNMVERBOSE $PAGEOPTS $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE > $IMAGE_DIR/${IMAGE_FILE}.ps | logstdout
log "...Running ps2pdf on $IMAGE_DIR/${IMAGE_FILE}.ps"
runconstrained $TIMEVERBOSE ps2pdf $PS2PDF_OPTS $IMAGE_DIR/${IMAGE_FILE}.ps $IMAGE_DIR/${IMAGE_FILE}.pdf | logstdout
[[ -f $IMAGE_DIR/${IMAGE_FILE}.ps ]] && rm $IMAGE_DIR/$PP_PREFIX${IMAGE_FILE}.ps
fi
else
log "Skipping empty page $IMAGE_FILE with white percentage $PERCENTAGE_WHITE"
Expand Down

0 comments on commit d33c8ca

Please sign in to comment.