From b584e88d64c88784820acf003bebcedc9969102a Mon Sep 17 00:00:00 2001 From: SamGerber Date: Tue, 27 Jun 2017 00:39:46 -0500 Subject: [PATCH 1/4] Support Arabic hyphenated 'al-' This seems to be a currently accepted way to write Arabic surnames. Source: >"Al-" and its variants (ash-, ad-, ar-, etc.) are always written in lower case (unless beginning a sentence), and a hyphen separates it from the following word. > > https://en.wikipedia.org/wiki/Wikipedia:Manual_of_Style/Arabic#Definite_article --- lib/namecase.rb | 2 +- test/test_namecase.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/namecase.rb b/lib/namecase.rb index c2bf397..fe24851 100644 --- a/lib/namecase.rb +++ b/lib/namecase.rb @@ -54,7 +54,7 @@ def self.nc str, options = {} end # Fixes for "son (daughter) of" etc - localstring.gsub!(/\bAl(?=\s+\w)/, 'al') # al Arabic or forename Al. + localstring.gsub!(/\bAl\b(?=.+\w)/, 'al') # al Arabic or forename Al. localstring.gsub!(/\b(Bin|Binti|Binte)\b/,'bin') # bin, binti, binte Arabic localstring.gsub!(/\bAp\b/, 'ap') # ap Welsh. localstring.gsub!(/\bBen(?=\s+\w)/,'ben') # ben Hebrew or forename Ben. diff --git a/test/test_namecase.rb b/test/test_namecase.rb index 8c3d323..500680f 100644 --- a/test/test_namecase.rb +++ b/test/test_namecase.rb @@ -9,7 +9,7 @@ def setup "Keith", "Leigh-Williams", "McCarthy", "O'Callaghan", "St. John", "von Streit", "van Dyke", "Van", "ap Llwyd Dafydd", - "al Fahd", "Al", + "al Fahd", "al-Fahd", "Al", "el Grecco", "ben Gurion", "Ben", "da Vinci", From 174dc4f10e7724352af4cc0eba4add512a31948f Mon Sep 17 00:00:00 2001 From: SamGerber Date: Tue, 27 Jun 2017 01:07:33 -0500 Subject: [PATCH 2/4] Explicitly test hyphenated hebrew 'Ben-' According to New Hart's Style, > The _ben_ that occurs in many Hebrew names > means 'son of'; this is the traditional > Jewish way of naming Jewish males. The > female form is _bat_ 'daughter of'. In pre- > modern times a man would usually be known > simply as the son of his father: > _Avraham ben David_ (Avraham the son of David). > ... > Then _Ben_ that often fiures in modern Israeli > names represents a different usage: now part > of the surname, it should be hyphenated to it > and capitalized, as in _David Ben-Gurion_, and > alphabetized under _B_. --- test/test_namecase.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_namecase.rb b/test/test_namecase.rb index 500680f..9f08b80 100644 --- a/test/test_namecase.rb +++ b/test/test_namecase.rb @@ -11,7 +11,7 @@ def setup "van Dyke", "Van", "ap Llwyd Dafydd", "al Fahd", "al-Fahd", "Al", "el Grecco", - "ben Gurion", "Ben", + "ben Gurion", "Ben-Gurion", "Ben", "da Vinci", "di Caprio", "du Pont", "de Legate", "del Crond", "der Sind", "van der Post", From 873e9106a67a7438d2d0945ce5c786752fbe1568 Mon Sep 17 00:00:00 2001 From: SamGerber Date: Tue, 27 Jun 2017 01:16:49 -0500 Subject: [PATCH 3/4] Support French surnames with liason (d'/l') This also required specifying that roman numerals ought to appear before a space or at the end of the string, otherwise, "l'" was being upcased by the roman numeral substitution. --- lib/namecase.rb | 6 +++--- test/test_namecase.rb | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/namecase.rb b/lib/namecase.rb index fe24851..b034ba5 100644 --- a/lib/namecase.rb +++ b/lib/namecase.rb @@ -59,19 +59,19 @@ def self.nc str, options = {} localstring.gsub!(/\bAp\b/, 'ap') # ap Welsh. localstring.gsub!(/\bBen(?=\s+\w)/,'ben') # ben Hebrew or forename Ben. localstring.gsub!(/\bDell([ae])\b/,'dell\1') # della and delle Italian. - localstring.gsub!(/\bD([aeiou])\b/,'d\1') # da, de, di Italian; du French; do Brasil + localstring.gsub!(/\bD([aeiou'’])\b/,'d\1') # da, de, di Italian; du, d' French; do Brasil localstring.gsub!(/\bD([ao]s)\b/,'d\1') # das, dos Brasileiros localstring.gsub!(/\bDe([lr])\b/,'de\1') # del Italian; der Dutch/Flemish. localstring.gsub!(/\bEl\b/,'el') # el Greek or El Spanish. localstring.gsub!(/\bLa\b/,'la') # la French or La Spanish. - localstring.gsub!(/\bL([eo])\b/,'l\1') # lo Italian; le French. + localstring.gsub!(/\bL([eo'’])\b/,'l\1') # lo Italian; le, l' French. localstring.gsub!(/\bVan(?=\s+\w)/,'van') # van German or forename Van. localstring.gsub!(/\bVon\b/,'von') # von Dutch/Flemish # Fix roman numeral names localstring.gsub!( / \b ( (?: [Xx]{1,3} | [Xx][Ll] | [Ll][Xx]{0,3} )? - (?: [Ii]{1,3} | [Ii][VvXx] | [Vv][Ii]{0,3} )? ) \b /x + (?: [Ii]{1,3} | [Ii][VvXx] | [Vv][Ii]{0,3} )? ) \b (?= \s | \z ) /x ) { |m| m.upcase } if options[:spanish] diff --git a/test/test_namecase.rb b/test/test_namecase.rb index 9f08b80..2ba23e7 100644 --- a/test/test_namecase.rb +++ b/test/test_namecase.rb @@ -14,6 +14,8 @@ def setup "ben Gurion", "Ben-Gurion", "Ben", "da Vinci", "di Caprio", "du Pont", "de Legate", + "d'Alembert", "d’Alembert", + "de l'Hôpital", "de l’Hôpital", "del Crond", "der Sind", "van der Post", "von Trapp", "la Poisson", "le Figaro", "Mack Knife", "Dougal MacDonald", From 90bf86abba3da82bd9fa6f0f77326b6dde93e93e Mon Sep 17 00:00:00 2001 From: SamGerber Date: Tue, 27 Jun 2017 02:31:36 -0500 Subject: [PATCH 4/4] Clarify comments --- lib/namecase.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/namecase.rb b/lib/namecase.rb index b034ba5..ec1b601 100644 --- a/lib/namecase.rb +++ b/lib/namecase.rb @@ -54,10 +54,10 @@ def self.nc str, options = {} end # Fixes for "son (daughter) of" etc - localstring.gsub!(/\bAl\b(?=.+\w)/, 'al') # al Arabic or forename Al. + localstring.gsub!(/\bAl\b(?=.+\w)/, 'al') # al Arabic, but NOT the first name Al. localstring.gsub!(/\b(Bin|Binti|Binte)\b/,'bin') # bin, binti, binte Arabic localstring.gsub!(/\bAp\b/, 'ap') # ap Welsh. - localstring.gsub!(/\bBen(?=\s+\w)/,'ben') # ben Hebrew or forename Ben. + localstring.gsub!(/\bBen(?=\s+\w)/,'ben') # ben Hebrew, but NOT the first name Ben. localstring.gsub!(/\bDell([ae])\b/,'dell\1') # della and delle Italian. localstring.gsub!(/\bD([aeiou'’])\b/,'d\1') # da, de, di Italian; du, d' French; do Brasil localstring.gsub!(/\bD([ao]s)\b/,'d\1') # das, dos Brasileiros @@ -65,7 +65,7 @@ def self.nc str, options = {} localstring.gsub!(/\bEl\b/,'el') # el Greek or El Spanish. localstring.gsub!(/\bLa\b/,'la') # la French or La Spanish. localstring.gsub!(/\bL([eo'’])\b/,'l\1') # lo Italian; le, l' French. - localstring.gsub!(/\bVan(?=\s+\w)/,'van') # van German or forename Van. + localstring.gsub!(/\bVan(?=\s+\w)/,'van') # van German, but NOT the first name Van. localstring.gsub!(/\bVon\b/,'von') # von Dutch/Flemish # Fix roman numeral names