%% Note: most of these rules are based on the Ulster pronunciation. %% For Connacht and Munster pronunciations, ymmv ::front_vowel:: = i|e|í|é ::back_vowel:: = a|o|u|á|ó|ú ::vowel:: = a|i|e|o|u|á|ó|ú|í|é ::long_vowel:: = á|ó|ú|í|é ::short_vowel:: = a|o|u|i|e ::diphthong:: = au|ai|ei|ua|ia ::consonant:: = bh|ch|dh|fh|gh|mh|ng|ph|sh|th|ll|nn|rr|b|c|d|f|g|h|l|m|n|p|r|s|t|v|z ::digraph_consonant:: = bh|ch|dh|fh|gh|mh|ng|ph|sh|th|ll|nn|rr %% Data normalization, preprocessing % Alternate spelling ḃ -> bh / _ ċ -> ch / _ ḋ -> dh / _ ḟ -> fh / _ ġ -> gh / _ ṁ -> mh / _ ṗ -> ph / _ ṡ -> sh / _ ṫ -> th / _ ’ -> ' / _ % Exceptional forms % short pronunciation of 0 -> ! / # _ (anseo|deoch|eochair|seo)# % forms of "bí" 0 -> B! / # _ (bheadh|mbeadh|bheas|bead|beadh)# ea -> eъ / #B!.* _ B! -> 0 / _ % regular verb conjugations % specifically, those containing 0 -> V! / (? th / V! _ % and several containing <ó(i)> or % TODO: stress-test non-verbs and see if any remaining expressions overzealously match 0 -> V! / (? óchъəъ / V!.* _ ъi -> ь / V!.*ə _ % becomes /w/ after a broad vowel in verb endings % There is an exception when followed by certain pronouns, but context-sensitive replacement is out-of-scope dh -> v / V!.*(? has a nonstandard pronunciation, but its implementation causes others to break %i -> ьəъ / V!a? _ m# %aь -> ъ / V! _ əъm V! -> 0 / _ % rules for common irregular pronunciations raibh -> ro / # _ # abha -> ó / #(d?t|th) _ i?r abha -> ó / #(n?g|gh) _ i?l abha -> ó / #(n-|h)? _ i?nn# d -> 0 / #(g?c|ch)o _ la[dt] f -> 0 / #d'fhia _ raigh# g -> 0 / (d?t|th)arrain _ # gheobhaidh -> ghьó / # _ # sh -> 0 / # _ roich# d' -> 0 / # _ fhreagair# a -> ъ / #(g?c|ch)r _ inn# a -> ъ / #(m?b|bh) _ in 0 -> idh / #crua _ # %% Eclipsis (urú) Rules mb -> m / # _ gc -> g / # _ nd -> n / # _ bhf -> bh / # _ bp -> b / # _ dt -> d / # _ ts -> t / # _ %
is realized as /t/ outside of eclipsis dt -> t / _ %% Broad-slender vowel rules % generally: [ei] with slender consonants, [aou] with broad consonants % Note that accented characters may be classified as sequences % is pronounced /e:/, generally between broad consonants aei -> ъé / (::consonant::) _ ae -> ъéъ / (::consonant::) _ % is pronounced /i:/, generally between broad consonants aoi -> ъí / (::consonant::) _ (::consonant::|#) ao -> ъíъ / (::consonant::) _ (::consonant::|#) % is /o:/ except in four words i -> ь / eo _ eo -> ьó / _ ó -> o / #!.* _ % is usually /e/, but sometimes /i/ or /e:/ ei -> i / _ [mn] i -> ь / (e|é) _ (?![dg]h)(::consonant::) i -> ь / é _ [dg]h 0 -> ь / ei[dg]h _ (::consonant::) e -> é / _ ь?r[dln] % represents many possible sounds oi -> ъi / (n|m|mh) _ oi -> ъi / _ (n|m|mh) oi -> oь / _ (cht|rs|rt|rth|s) oi -> óь / _ r[dln] oi -> ъi / _ ll(#|::consonant::) oi -> ъe / _ (?![dg]h) % Rules for u -> ú / _ (ó|á) u -> ъ / _ (i|í) u -> ú / _ r[dln] % Rules for a -> ъ / é _ a -> ъ / _ í i -> ь / (a|á) _ (?![gd]h)(::consonant::) a -> á / _ ь?r[dlnr] % Rules for i -> í / _ (ó|á) i -> ь / (::consonant::) _ (u|ú) i -> ь / (á|ú|ó) _ % Rules for o -> ъ / (i|í) _ (::consonant::) o -> ó / _ r[dln] % Rules for e -> ь / (::consonant::|#) _ (::back_vowel::) % Rules for , , , after short vowels % ensure broad/slender rules are applied before deleting vowel symbols 0 -> ь / (::short_vowel::)[bdgm]h(::short_vowel::)?(::front_vowel::) _ (::consonant::) 0 -> ъ / (::short_vowel::)[bdgm]h(::short_vowel::)?(::back_vowel::) _ (::consonant::) ai? -> 0 / [aou][bdgm]h _ ([eь]a|i) -> 0 / [eo]i[dg]h_ [eь]a -> 0 / aigh _ e -> 0 / ai[dg]h _ 0 -> : / [aou][bm]h _ 0 -> : / [aoi][dg]h _ % general insertion rules 0 -> ь / (::consonant::) _ (::front_vowel::) 0 -> ь / (::front_vowel::) _ (::consonant::) 0 -> ъ / (::consonant::) _ (::back_vowel::) 0 -> ъ / (::back_vowel::) _ (::consonant::) % Revert overzealous replacement [ьъ] -> 0 / (::short_vowel::) _ [bdgm]h: % realized as /r/ in certain initial clusters n -> r / #[^s]h? _ % deleted finally after long vowels and diphthongs th -> 0 / (::long_vowel::|::diphthong::)[ьъ] _ #