Process XML and extract to a tab-delimited file

What is a good approach to convert an XML file like the attached MARTIF file to a tab-delimited file (example also attached)?
Archive.zip (2.1 KB)

But for the slightly non compliant doctype element in the TXE header, you could certainly use standard tools like XPATH or XQuery to extract the terms pairs.

In XQuery for example:

for $term in //ntig/termGrp/term/text()
return $term

In the following, I've cut the doctype lines out, but I notice that (pace the complaints of compliant XML parsers) the XQuery still seems to work if we leave them in.

Tabbed pairs from TXE.kmmacros (16 KB)


Expand disclosure triangle to view JS source
(() => {
    "use strict";

    const main = () => {
        const
            uw = ObjC.unwrap,
            kmVar = kmValue(kmInstance());

        return either(
            alert("Term pairs from XML")
        )(
            xs => chunksOf(2)(xs)
            .map(pair => pair.join("\t"))
            .join("\n")
        )(
            fmapLR(
                xs => xs.map(x => uw(x.stringValue))
            )(
                valuesFromXQuery(
                    kmVar("local_XQuery")
                )(
                    kmVar("local_TXE_XML")
                )
            )
        );
    };

    // ---------------- KEYBOARD MAESTRO -----------------

    // kmValue :: KM Instance -> String -> IO String
    const kmValue = instance =>
        k => Application("Keyboard Maestro Engine")
        .getvariable(k, {instance});

    // kmInstance :: () -> IO String
    const kmInstance = () =>
        ObjC.unwrap(
            $.NSProcessInfo.processInfo.environment
            .objectForKey("KMINSTANCE")
        ) || "";

    // --------------------- XQUERY ----------------------

    // valuesFromXQuery :: XQuery String -> XML String -> Either String [a]
    const valuesFromXQuery = xq =>
        xml => {
            const
                uw = ObjC.unwrap,
                eXML = $(),
                docXML = $.NSXMLDocument.alloc
                .initWithXMLStringOptionsError(
                    xml, 0, eXML
                );

            return bindLR(
                docXML.isNil()
                    ? Left(uw(eXML.localizedDescription))
                    : Right(docXML)
            )(
                doc => {
                    const
                        eXQ = $(),
                        xs = doc.objectsForXQueryError(xq, eXQ);

                    return xs.isNil()
                        ? Left(uw(eXQ.localizedDescription))
                        : Right(uw(xs));
                }
            );
        };

    // ----------------------- JXA -----------------------

    // alert :: String => String -> IO String
    const alert = title =>
        s => {
            const sa = Object.assign(
                Application("System Events"), {
                    includeStandardAdditions: true
                });

            return (
                sa.activate(),
                sa.displayDialog(s, {
                    withTitle: title,
                    buttons: ["OK"],
                    defaultButton: "OK"
                }),
                s
            );
        };

    // --------------------- GENERIC ---------------------

    // Left :: a -> Either a b
    const Left = x => ({
        type: "Either",
        Left: x
    });

    // Right :: b -> Either a b
    const Right = x => ({
        type: "Either",
        Right: x
    });

    // bindLR (>>=) :: Either a ->
    // (a -> Either b) -> Either b
    const bindLR = m =>
        mf => m.Left ? (
            m
        ) : mf(m.Right);


    // chunksOf :: Int -> [a] -> [[a]]
    const chunksOf = n => {
    // xs split into sublists of length n.
    // The last sublist will be short if n
    // does not evenly divide the length of xs .
        const go = xs => {
            const chunk = xs.slice(0, n);

            return 0 < chunk.length
                ? [chunk, ...go(xs.slice(n))]
                : [];
        };

        return go;
    };

    // either :: (a -> c) -> (b -> c) -> Either a b -> c
    const either = fl =>
        // Application of the function fl to the
        // contents of any Left value in e, or
        // the application of fr to its Right value.
        fr => e => e.Left ? (
            fl(e.Left)
        ) : fr(e.Right);

    // fmapLR (<$>) :: (b -> c) -> Either a b -> Either a c
    const fmapLR = f =>
        // Either f mapped into the contents of any Right
        // value in e, or e unchanged if is a Left value.
        e => "Left" in e ? (
            e
        ) : Right(f(e.Right));

    return main();
})();
1 Like

Thank you very much. This is a nice generic approach to process XML files.

1 Like

For simple cases of fetching data like this, without much further processing, XQuery may be more than you need, and the XPath expression alone is probably enough:

//ntig/termGrp/term/text()

It just requires a slightly different calling function in the JXA:

Tabbed pairs from TXE via XPATH.kmmacros (15 KB)


Expand disclosure triangle to view JS source
(() => {
    "use strict";

    const main = () => {
        const
            uw = ObjC.unwrap,
            kmVar = kmValue(kmInstance());

        return either(
            alert("Term pairs from XML by XPath")
        )(
            compose(
                intercalate("\n"),
                map(intercalate("\t")),
                chunksOf(2)
            )
        )(
            fmapLR(
                xs => xs.map(x => uw(x.stringValue))
            )(
                xPathMatchesFromXmlLR(
                    kmVar("local_XPath")
                )(
                    kmVar("local_TXE_XML")
                )
            )
        );
    };

    // ---------------- KEYBOARD MAESTRO -----------------

    // kmValue :: KM Instance -> String -> IO String
    const kmValue = instance =>
        k => Application("Keyboard Maestro Engine")
        .getvariable(k, {instance});


    // kmInstance :: () -> IO String
    const kmInstance = () =>
        ObjC.unwrap(
            $.NSProcessInfo.processInfo.environment
            .objectForKey("KMINSTANCE")
        ) || "";

    // --------------------- XPATH ----------------------

    // xPathMatchesFromXmlLR :: String ->
    // String -> Either String [NSXMLElement]
    const xPathMatchesFromXmlLR = xpath =>
        xml => {
            const
                uw = ObjC.unwrap,
                error = $(),
                xmlDoc = $.NSXMLDocument.alloc
                .initWithXMLStringOptionsError(
                    xml, 0, error
                );

            return bindLR(
                xmlDoc.isNil()
                    ? Left(uw(error.localizedDescription))
                    : Right(xmlDoc)
            )(doc => {
                const
                    e = $(),
                    matches = (
                        doc.documentContentKind = (
                            $.NSXMLDocumentXMLKind
                        ),
                        doc.nodesForXPathError(
                            xpath, e
                        )
                    );

                return matches.isNil()
                    ? Left(uw(e.localizedDescription))
                    : Right(uw(matches));
            });
        };


    // ----------------------- JXA -----------------------

    // alert :: String => String -> IO String
    const alert = title =>
        s => {
            const sa = Object.assign(
                Application("System Events"), {
                    includeStandardAdditions: true
                });

            return (
                sa.activate(),
                sa.displayDialog(s, {
                    withTitle: title,
                    buttons: ["OK"],
                    defaultButton: "OK"
                }),
                s
            );
        };

    // --------------------- GENERIC ---------------------

    // Left :: a -> Either a b
    const Left = x => ({
        type: "Either",
        Left: x
    });


    // Right :: b -> Either a b
    const Right = x => ({
        type: "Either",
        Right: x
    });


    // bindLR (>>=) :: Either a ->
    // (a -> Either b) -> Either b
    const bindLR = m =>
        mf => m.Left ? (
            m
        ) : mf(m.Right);


    // chunksOf :: Int -> [a] -> [[a]]
    const chunksOf = n => {
    // xs split into sublists of length n.
    // The last sublist will be short if n
    // does not evenly divide the length of xs .
        const go = xs => {
            const chunk = xs.slice(0, n);

            return 0 < chunk.length
                ? [chunk, ...go(xs.slice(n))]
                : [];
        };

        return go;
    };


    // compose (<<<) :: (b -> c) -> (a -> b) -> a -> c
    const compose = (...fs) =>
    // A function defined by the right-to-left
    // composition of all the functions in fs.
        fs.reduce(
            (f, g) => x => f(g(x)),
            x => x
        );


    // either :: (a -> c) -> (b -> c) -> Either a b -> c
    const either = fl =>
        // Application of the function fl to the
        // contents of any Left value in e, or
        // the application of fr to its Right value.
        fr => e => e.Left ? (
            fl(e.Left)
        ) : fr(e.Right);


    // fmapLR (<$>) :: (b -> c) -> Either a b -> Either a c
    const fmapLR = f =>
        // Either f mapped into the contents of any Right
        // value in e, or e unchanged if is a Left value.
        e => "Left" in e ? (
            e
        ) : Right(f(e.Right));


    // intercalate :: String -> [String] -> String
    const intercalate = s =>
    // The concatenation of xs
    // interspersed with copies of s.
        xs => xs.join(s);


    // map :: (a -> b) -> [a] -> [b]
    const map = f =>
    // The list obtained by applying f
    // to each element of xs.
    // (The image of xs under f).
        xs => [...xs].map(f);

    return main();
})();
1 Like