How to create a Word Pattern Identifier?

I have this collection of phrases:

Open the door and close the windows=Open de deur en sluit de ramen
Open the faucet and close the valve=Open de kraan en sluit de klep
Open the hood and start the engine=Open de motorkap en start de motor
Open the microwave and close the packages=Open de magnetron en sluit de verpakkingen
Open the packages and close the microwave=Open de verpakkingen en sluit de magnetron
Open the valve and close the faucet=Open de klep en sluit de kraan
Open the windows and close the door=Open de ramen en sluit de deur

I'm looking for a way to identify the regularity (pattern) here:

Open the xxx and close the yyy=Open de xxx en sluit de yyy

The best approach that I can come up with, is to execute a regular expression on these lines for as many times as there are words in the left (and right) part: 7 times. During the execution I replace every nth word with xxx and every xth word with yyy. Eventually I should have 7 identical phrases with two patterns identified: one at the left-hand side and one at the right-hand side (for simplicity, lets first focus on the left-hand side):

Open the xxx and close the yyy
Open the xxx and close the yyy
Open the xxx and start the yyy
Open the xxx and close the yyy
Open the xxx and close the yyy
Open the xxx and close the yyy
Open the xxx and close the yyy

This clearly identifies a pattern.

Here is my regular expression

Question: Would this be a good approach to work on?

I personally find the use of a split function conceptually simpler, and easier to adjust and maintain.

(most scripting languages have split built in – in others it is simply written)

Here, for example splitting on "=" and then sub-splitting on {and, the}, {de, en}


Open and close pattern (by splitting and sub-splitting).kmmacros (7.2 KB)


OUTPUT


Expand disclosure triangle to view JS Source
(() => {
    "use strict";

    // main :: IO ()
    const main = () => {
        const
            kme = Application("Keyboard Maestro Engine"),
            equivalences = lines(
                kme.getvariable(
                    "equivalenceSamples"
                )
            );

        return sortOn(x => x[0])(
                nubBy(
                    ([a, b]) => ([x, y]) => a === x && b === y
                )(
                    equivalences.map(
                        x => bimap(
                            s => s.split(/ the /u).flatMap(
                                k => k.split(/ and /u)
                            )
                        )(
                            s => s.split(/ de /u).flatMap(
                                k => k.split(/ en /u)
                            )
                        )(
                            x.split("=")
                        )
                    )
                    .flatMap(uncurry(zip))
                )
            )
            .map(pair => pair.join(","))
            .join("\n");
    };

    // --------------------- GENERIC ---------------------

    // bimap :: (a -> b) -> (c -> d) -> (a, c) -> (b, d)
    const bimap = f =>
        // Tuple instance of bimap.
        // A tuple of the application of f and g to the
        // first and second values respectively.
        g => tpl => [
            f(tpl[0]),
            g(tpl[1])
        ];

    // comparing :: Ord a => (b -> a) -> b -> b -> Ordering
    const comparing = f =>
        // The ordering of f(x) and f(y) as a value
        // drawn from {-1, 0, 1}, representing {LT, EQ, GT}.
        x => y => {
            const
                a = f(x),
                b = f(y);

            return a < b ? -1 : (a > b ? 1 : 0);
        };

    // lines :: String -> [String]
    const lines = s =>
        // A list of strings derived from a single string
        // which is delimited by \n or by \r\n or \r.
        Boolean(s.length) ? (
            s.split(/\r\n|\n|\r/u)
        ) : [];


    // nubBy :: (a -> a -> Bool) -> [a] -> [a]
    const nubBy = p =>
        // A sublist of xs from which all duplicates,
        // (as defined by the equality predicate p)
        // are excluded.
        xs => xs.reduce(
            (seen, x) => seen.some(p(x)) ? (
                seen
            ) : [x].concat(seen),
            []
        );

    // sortBy :: (a -> a -> Ordering) -> [a] -> [a]
    const sortBy = f =>
        // A copy of xs sorted by the comparator function f.
        xs => xs.slice()
        .sort((a, b) => f(a)(b));

    // sortOn :: Ord b => (a -> b) -> [a] -> [a]
    const sortOn = f =>
        // Equivalent to sortBy(comparing(f)), but with f(x)
        // evaluated only once for each x in xs.
        // ('Schwartzian' decorate-sort-undecorate).
        xs => sortBy(
            comparing(x => x[0])
        )(
            xs.map(x => [f(x), x])
        )
        .map(x => x[1]);

    // zip :: [a] -> [b] -> [(a, b)]
    const zip = xs =>
        // The paired members of xs and ys, up to
        // the length of the shorter of the two lists.
        ys => Array.from({
            length: Math.min(xs.length, ys.length)
        }, (_, i) => [xs[i], ys[i]]);


    // uncurry :: (a -> b -> c) -> ((a, b) -> c)
    const uncurry = f =>
        // A function over a pair, derived
        // from a curried function.
        (...args) => {
            const [x, y] = Boolean(args.length % 2) ? (
                args[0]
            ) : args;

            return f(x)(y);
        };

    //  MAIN --
    return main();
})();

Or, FWIW using AppleScript to do the same thing:

Expand disclosure triangle to view AppleScript Source
use framework "Foundation"

on run
	tell application "Keyboard Maestro Engine" to ¬
		set equivalences to paragraphs of ¬
			(getvariable "equivalenceSamples")
	
	set pairs to map(bimap(splitOn(" the "), splitOn(" de ")), ¬
		map(splitOn("="), equivalences))
	
	script subSplit
		on |λ|(enNL)
			set {en, nl} to enNL
			
			zip(concatMap(splitOn(" and "), en), ¬
				concatMap(splitOn(" en "), nl))
		end |λ|
	end script
	
	set wordPairs to concatMap(subSplit, pairs)
	
	script identicalPair
		on |λ|(ab, xy)
			set {a, b} to ab
			set {x, y} to xy
			
			a = x and b = y
		end |λ|
	end script
	
	set uniquePairs to nubBy(identicalPair, wordPairs)
	
	unlines(map(intercalate(", "), sortOn(fst, uniquePairs)))
end run



------------------------- GENERAL ------------------------

-- Tuple (,) :: a -> b -> (a, b)
on Tuple(a, b)
	-- Constructor for a pair of values, possibly of two different types.
	{type:"Tuple", |1|:a, |2|:b, length:2}
end Tuple



-- mReturn :: First-class m => (a -> b) -> m (a -> b)
on mReturn(f)
	-- 2nd class handler function lifted into 1st class script wrapper. 
	if script is class of f then
		f
	else
		script
			property |λ| : f
		end script
	end if
end mReturn


-- bimap :: (a -> b) -> (c -> d) -> (a, c) -> (b, d)
on bimap(f, g)
	-- Tuple instance of bimap.
	-- A tuple of the application of f and g to the
	-- first and second values of tpl respectively.
	script
		on |λ|(x)
			{|λ|(fst(x)) of mReturn(f), ¬
				|λ|(snd(x)) of mReturn(g)}
		end |λ|
	end script
end bimap


-- concat :: [[a]] -> [a]
on concat(xs)
	set lst to {}
	repeat with x in xs
		set end of lst to contents of x
	end repeat
end concat


-- concatMap :: (a -> [b]) -> [a] -> [b]
on concatMap(f, xs)
	set lng to length of xs
	set acc to {}
	
	tell mReturn(f)
		repeat with i from 1 to lng
			set acc to acc & (|λ|(item i of xs, i, xs))
		end repeat
	end tell
	acc
end concatMap


-- filter :: (a -> Bool) -> [a] -> [a]
on filter(p, xs)
	tell mReturn(p)
		set n to length of xs
		set ys to {}
		
		repeat with i from 1 to n
			set v to item i of xs
			if |λ|(v, i, xs) then set end of ys to v
		end repeat
		ys
	end tell
end filter


-- flatten :: NestedList a -> [a]
on flatten(t)
	-- A flat list derived from a nested list.
	if list is class of t then
		concatMap(my flatten, t)
	else
		t
	end if
end flatten


-- foldr :: (a -> b -> b) -> b -> [a] -> b
on foldr(f, startValue, xs)
	tell mReturn(f)
		set v to startValue
		set lng to length of xs
		repeat with i from lng to 1 by -1
			set v to |λ|(item i of xs, v, i, xs)
		end repeat
		return v
	end tell
end foldr


-- fst :: (a, b) -> a
on fst(tpl)
	if class of tpl is record then
		|1| of tpl
	else
		item 1 of tpl
	end if
end fst


-- intercalate :: String -> [String] -> String
on intercalate(delim)
	script
		on |λ|(xs)
			set {dlm, my text item delimiters} to ¬
				{my text item delimiters, delim}
			set s to xs as text
			set my text item delimiters to dlm
			s
		end |λ|
	end script
end intercalate


-- map :: (a -> b) -> [a] -> [b]
on map(f, xs)
	-- The list obtained by applying f
	-- to each element of xs.
	tell mReturn(f)
		set lng to length of xs
		set lst to {}
		repeat with i from 1 to lng
			set end of lst to |λ|(item i of xs, i, xs)
		end repeat
		return lst
	end tell
end map


-- min :: Ord a => a -> a -> a
on min(x, y)
	if y < x then
		y
	else
		x
	end if
end min


-- nubBy :: (a -> a -> Bool) -> [a] -> [a]
on nubBy(f, xs)
	set g to mReturn(f)'s |λ|
	
	script notEq
		property fEq : g
		on |λ|(a)
			script
				on |λ|(b)
					not fEq(a, b)
				end |λ|
			end script
		end |λ|
	end script
	
	script go
		on |λ|(xs)
			if (length of xs) > 1 then
				set x to item 1 of xs
				{x} & go's |λ|(filter(notEq's |λ|(x), items 2 thru -1 of xs))
			else
				xs
			end if
		end |λ|
	end script
	
	go's |λ|(xs)
end nubBy


-- snd :: (a, b) -> b
on snd(tpl)
	if class of tpl is record then
		|2| of tpl
	else
		item 2 of tpl
	end if
end snd


-- sortOn :: Ord b => (a -> b) -> [a] -> [a]
-- sortOn :: Ord b => [((a -> b), Bool)]  -> [a] -> [a]
on sortOn(f, xs)
	-- Sort a list by comparing the results of a key function applied to each
	-- element. sortOn f is equivalent to sortBy(comparing(f), xs), but has the
	-- performance advantage of only evaluating f once for each element in
	-- the input list. This is called the decorate-sort-undecorate paradigm,
	-- or Schwartzian transform.
	-- Elements are arranged from from lowest to highest.
	
	-- In this Applescript implementation, f can optionally be [(a -> b)]
	-- or [((a -> b), Bool)]) to specify a compound sort order
	
	--    xs:  List of items to be sorted. 
	--          (The items can be records, lists, or simple values).
	--
	--    f:    A single (a -> b) function (Applescript handler),
	--          or a list of such functions.
	--          if the argument is a list, any function can 
	--          optionally be followed by a bool. 
	--          (False -> descending sort)
	--
	--          (Subgrouping in the list is optional and ignored)
	--          Each function (Item -> Value) in the list should 
	--          take an item (of the type contained by xs) 
	--          as its input and return a simple orderable value 
	--          (Number, String, or Date).
	--
	--          The sequence of key functions and optional 
	--          direction bools defines primary to N-ary sort keys.
	script keyBool
		on |λ|(x, a)
			if boolean is class of x then
				{asc:x, fbs:fbs of a}
			else
				{asc:true, fbs:({Tuple(x, asc of a)} & fbs of a)}
			end if
		end |λ|
	end script
	set {fs, bs} to {|1|, |2|} of unzip(fbs of foldr(keyBool, ¬
		{asc:true, fbs:{}}, flatten({f})))
	
	set intKeys to length of fs
	set ca to current application
	script dec
		property gs : map(my mReturn, fs)
		on |λ|(x)
			set nsDct to (ca's NSMutableDictionary's ¬
				dictionaryWithDictionary:{val:x})
			repeat with i from 1 to intKeys
				(nsDct's setValue:((item i of gs)'s |λ|(x)) ¬
					forKey:(character id (96 + i)))
			end repeat
			nsDct as record
		end |λ|
	end script
	
	script descrip
		on |λ|(bool, i)
			ca's NSSortDescriptor's ¬
				sortDescriptorWithKey:(character id (96 + i)) ¬
					ascending:bool
		end |λ|
	end script
	
	script undec
		on |λ|(x)
			val of x
		end |λ|
	end script
	
	map(undec, ((ca's NSArray's arrayWithArray:map(dec, xs))'s ¬
		sortedArrayUsingDescriptors:map(descrip, bs)) as list)
end sortOn


-- splitOn :: String -> String -> [String]
on splitOn(pat)
	-- splitOn("\r\n", "a\r\nb\r\nd\r\ne") --> ["a","b","d","e"]
	-- splitOn("aaa", "aaaXaaaXaaaXaaa") --> {"", "X", "X", "X", ""}
	-- splitOn("x", "x") --> {"", ""}
	script
		on |λ|(src)
			set {dlm, my text item delimiters} to ¬
				{my text item delimiters, pat}
			set xs to text items of src
			set my text item delimiters to dlm
			return xs
		end |λ|
	end script
end splitOn


-- unlines :: [String] -> String
on unlines(xs)
	-- A single string formed by the intercalation
	-- of a list of strings with the newline character.
	set {dlm, my text item delimiters} to ¬
		{my text item delimiters, linefeed}
	set s to xs as text
	set my text item delimiters to dlm
	s
end unlines


-- unzip :: [(a,b)] -> ([a],[b])
on unzip(xys)
	set xs to {}
	set ys to {}
	repeat with xy in xys
		set end of xs to |1| of xy
		set end of ys to |2| of xy
	end repeat
	return Tuple(xs, ys)
end unzip


-- zip :: [a] -> [b] -> [(a, b)]
on zip(xs, ys)
	set lng to min(length of xs, length of ys)
	script go
		on |λ|(x, i)
			{x, item i of ys}
		end |λ|
	end script
	map(go, items 1 thru lng of xs)
end zip