Page N of PDF as plain text

Supplying the file path of a PDF file,
and using the Keyboard Maestro %JSONValue% token to get:

  1. the page count, and
  2. the plain text of specific pages, by page number.

Plain text of particular pages in a PDF file.kmmacros (7.9 KB)


Expand disclosure triangle to view JS source
return (() => {
    "use strict";

     // Rob Trew @2024
     // Ver 0.1

    ObjC.import("PDFKit");

    // main :: IO ()
    const main = () =>
        either(
            alert("Text of each page in PDF file.")
        )(
            pageTexts => JSON.stringify(pageTexts, null, 2)
        )(
            textPagesFromPDFLR(kmvar.local_PDF_Path)
        );


    // ----------------------- PDF -----------------------

    // textPagesFromPDFLR :: FilePath -> IO Either String [String]
    const textPagesFromPDFLR = pdfPath => {
        const
            fp = filePath(pdfPath),
            uw = ObjC.unwrap;

        return bindLR(
            doesFileExist(fp)
                ? Right(
                    $.PDFDocument.alloc.initWithURL(
                        $.NSURL.fileURLWithPath($(fp))
                    )
                )
                : Left(`File not found: "${fp}"`)
        )(
            mbDocPDF => mbDocPDF.isNil()
                ? Left(`Not readable as PDF: ${fp}`)
                : Right(
                    Array.from(
                        { length: uw(mbDocPDF.pageCount) },
                        (_, i) => uw(mbDocPDF.pageAtIndex(i).string)
                    )
                )
        );
    };

    // ----------------------- JXA -----------------------

    // alert :: String => String -> IO String
    const alert = title =>
        s => {
            const sa = Object.assign(
                Application("System Events"), {
                includeStandardAdditions: true
            });

            return (
                sa.activate(),
                sa.displayDialog(s, {
                    withTitle: title,
                    buttons: ["OK"],
                    defaultButton: "OK"
                }),
                s
            );
        };


    // --------------------- GENERIC ---------------------

    // Left :: a -> Either a b
    const Left = x => ({
        type: "Either",
        Left: x
    });


    // Right :: b -> Either a b
    const Right = x => ({
        type: "Either",
        Right: x
    });


    // bindLR (>>=) :: Either a ->
    // (a -> Either b) -> Either b
    const bindLR = lr =>
        // Bind operator for the Either option type.
        // If lr has a Left value then lr unchanged,
        // otherwise the function mf applied to the
        // Right value in lr.
        mf => "Left" in lr
            ? lr
            : mf(lr.Right);


    // doesFileExist :: FilePath -> IO Bool
    const doesFileExist = fp => {
        const ref = Ref();

        return $.NSFileManager
            .defaultManager
            .fileExistsAtPathIsDirectory(
                $(fp).stringByStandardizingPath,
                ref
            ) && !ref[0];
    };


    // either :: (a -> c) -> (b -> c) -> Either a b -> c
    const either = fl =>
        // Application of the function fl to the
        // contents of any Left value in e, or
        // the application of fr to its Right value.
        fr => e => "Left" in e
            ? fl(e.Left)
            : fr(e.Right);


    // filePath :: String -> FilePath
    const filePath = s =>
        // The given file path with any tilde expanded
        // to the full user directory path.
        ObjC.unwrap(
            $(s).stringByStandardizingPath
        );


    return main();
})();
4 Likes