Fix sorting order for Ukrainian locale (BZ 17293)

In the introduction for the official orthography rules for Ukrainian
language (http://spelling.ulif.org.ua/peredmova.htm) there's a note
that only apostrophe does not affect order of the words when sorting.
As could be seen from the official alphabet the soft sign
(U+044C/U+042C) has its hard position and thus affects the order and
also letters "е" and "є" (CYR-IE: U+0435/U+0415 and UKR-IE:
U+0454/U+0404) have their own positions and should have separate place
when sorting.
This also corresponds to official Unicode collation chart for these
letters: http://unicode.org/charts/collation/chart_Cyrillic.html
This commit is contained in:
Andriy Rysin 2015-05-26 23:51:18 +05:30 committed by Siddhesh Poyarekar
parent f09b861541
commit 6afb9c0175
5 changed files with 76 additions and 73 deletions

View File

@ -1,3 +1,8 @@
2015-05-26 Andriy Rysin <arysin@gmail.com>
[BZ #17293]
* uk_UA: Fix sorting order for Ukrainian locale
2015-05-26 Marko Myllynen <myllynen@redhat.com>
* stdlib/monetary.h: Fix comment.

18
NEWS
View File

@ -11,15 +11,15 @@ Version 2.22
438, 4719, 6792, 13028, 13064, 14094, 14841, 14906, 15319, 15467, 15790,
15969, 16159, 16339, 16351, 16352, 16512, 16560, 16704, 16783, 16850,
17053, 17090, 17195, 17269, 17523, 17542, 17569, 17581, 17588, 17596,
17620, 17621, 17628, 17631, 17692, 17711, 17715, 17776, 17779, 17792,
17836, 17912, 17916, 17930, 17932, 17944, 17949, 17964, 17965, 17967,
17969, 17978, 17987, 17991, 17996, 17998, 17999, 18007, 18019, 18020,
18029, 18030, 18032, 18036, 18038, 18039, 18042, 18043, 18046, 18047,
18049, 18068, 18080, 18093, 18100, 18104, 18110, 18111, 18125, 18128,
18138, 18185, 18196, 18197, 18206, 18210, 18211, 18217, 18220, 18221,
18234, 18244, 18247, 18287, 18319, 18333, 18346, 18397, 18409, 18410,
18412, 18418, 18434, 18444.
17053, 17090, 17195, 17269, 17293, 17523, 17542, 17569, 17581, 17588,
17596, 17620, 17621, 17628, 17631, 17692, 17711, 17715, 17776, 17779,
17792, 17836, 17912, 17916, 17930, 17932, 17944, 17949, 17964, 17965,
17967, 17969, 17978, 17987, 17991, 17996, 17998, 17999, 18007, 18019,
18020, 18029, 18030, 18032, 18036, 18038, 18039, 18042, 18043, 18046,
18047, 18049, 18068, 18080, 18093, 18100, 18104, 18110, 18111, 18125,
18128, 18138, 18185, 18196, 18197, 18206, 18210, 18211, 18217, 18220,
18221, 18234, 18244, 18247, 18287, 18319, 18333, 18346, 18397, 18409,
18410, 18412, 18418, 18434, 18444.
* Cache information can be queried via sysconf() function on s390 e.g. with
_SC_LEVEL1_ICACHE_SIZE as argument.

View File

@ -37,7 +37,7 @@ test-srcs := collate-test xfrm-test tst-fmon tst-rpmatch tst-trans \
tst-ctype tst-langinfo tst-langinfo-static tst-numeric
test-input := de_DE.ISO-8859-1 en_US.ISO-8859-1 da_DK.ISO-8859-1 \
hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 tr_TR.UTF-8 fr_FR.UTF-8 \
si_LK.UTF-8
si_LK.UTF-8 uk_UA.UTF-8
test-input-data = $(addsuffix .in, $(basename $(test-input)))
test-output := $(foreach s, .out .xout, \
$(addsuffix $s, $(basename $(test-input))))
@ -106,7 +106,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \
zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \
tr_TR.ISO-8859-9 en_GB.UTF-8
tr_TR.ISO-8859-9 en_GB.UTF-8 uk_UA.UTF-8
include ../gen-locales.mk
endif

View File

@ -340,70 +340,14 @@ copy "<U0069><U0073><U006F><U0031><U0034><U0036><U0035><U0031><U005F><U0074><U00
% Ukrainian ghe is missing in iso14651_t1
collating-symbol <UKR-GHE>
% Soft sign and apostrophe must be ignored during sorting because they are
% just signs, not real letters.
% Apostrophe must be ignored during sorting because it's just a sign, not a
% real letter.
% ( "<U006E><U0060>"=="<U006E>", "<U0027><U0079><U0061>"=="<U0079><U0061>", etc. )
%
% Apostrophe already ignored by iso14651_t1.
%
% Soft sign '<U044C>' may follow only this set of nine characters [<U0432><U0434><U0437><U043B><U043D><U0440><U0441><U0442><U0446>].
% It only softens pronunciation of these characters so it's should not impact
% sorting.
collating-symbol <V+SS>
collating-element <V-SS> from "<U0412><U042C>"
collating-element <V-ss> from "<U0412><U044C>"
collating-element <v-SS> from "<U0432><U042C>"
collating-element <v-ss> from "<U0432><U044C>"
collating-symbol <D+SS>
collating-element <D-SS> from "<U0414><U042C>"
collating-element <D-ss> from "<U0414><U044C>"
collating-element <d-SS> from "<U0434><U042C>"
collating-element <d-ss> from "<U0434><U044C>"
collating-symbol <Z+SS>
collating-element <Z-SS> from "<U0417><U042C>"
collating-element <Z-ss> from "<U0417><U044C>"
collating-element <z-SS> from "<U0437><U042C>"
collating-element <z-ss> from "<U0437><U044C>"
collating-symbol <L+SS>
collating-element <L-SS> from "<U041B><U042C>"
collating-element <L-ss> from "<U041B><U044C>"
collating-element <l-SS> from "<U043B><U042C>"
collating-element <l-ss> from "<U043B><U044C>"
collating-symbol <N+SS>
collating-element <N-SS> from "<U041D><U042C>"
collating-element <N-ss> from "<U041D><U044C>"
collating-element <n-SS> from "<U043D><U042C>"
collating-element <n-ss> from "<U043D><U044C>"
collating-symbol <R+SS>
collating-element <R-SS> from "<U0420><U042C>"
collating-element <R-ss> from "<U0420><U044C>"
collating-element <r-SS> from "<U0440><U042C>"
collating-element <r-ss> from "<U0440><U044C>"
collating-symbol <S+SS>
collating-element <S-SS> from "<U0421><U042C>"
collating-element <S-ss> from "<U0421><U044C>"
collating-element <s-SS> from "<U0441><U042C>"
collating-element <s-ss> from "<U0441><U044C>"
collating-symbol <T+SS>
collating-element <T-SS> from "<U0422><U042C>"
collating-element <T-ss> from "<U0422><U044C>"
collating-element <t-SS> from "<U0442><U042C>"
collating-element <t-ss> from "<U0442><U044C>"
collating-symbol <TSE+SS>
collating-element <TS-SS> from "<U0426><U042C>"
collating-element <TS-ss> from "<U0426><U044C>"
collating-element <ts-SS> from "<U0446><U042C>"
collating-element <ts-ss> from "<U0446><U044C>"
% In the official alphabet the soft sign is a letter and has a hard position in
% the order.
collating-symbol <CAP-MIN>
@ -489,11 +433,9 @@ reorder-after <U0434>
<U0455> "<U003C><U0043><U0059><U0052><U002D><U0044><U0045><U003E><U003C><U0043><U0059><U0052><U002D><U005A><U0045><U003E>";"<U003C><U004C><U0049><U0047><U003E><U003C><U004C><U0049><U0047><U003E>";"<U003C><U004D><U0049><U004E><U003E><U003C><U004D><U0049><U004E><U003E>";IGNORE % CYR-DZE
reorder-after <U0435>
<U0454> <CYR-IE>;<UKR-IE>;<MIN>;IGNORE
<U0451> <CYR-IE>;<CYR-IO>;<MIN>;IGNORE
<U044D> <CYR-IE>;<CYR-E>;<MIN>;IGNORE
reorder-after <U0415>
<U0404> <CYR-IE>;<UKR-IE>;<CAP>;IGNORE
<U0401> <CYR-IE>;<CYR-IO>;<CAP>;IGNORE
<U042D> <CYR-IE>;<CYR-E>;<CAP>;IGNORE

56
localedata/uk_UA.in Normal file
View File

@ -0,0 +1,56 @@
01010
Абажур
абажур
абажур-10
брама
вермішель
грати
Граття
граття
ґрати
ебонітовий
експорт
експосол
екс-посол
експоцентр
експрацівник
екс-працівник
еластичність
електрика
ельбор
елюент
епатаж
євгеніка
Європа
єдність
Жмих
жмих
зоря
и
і
ї
й
Карпати
криниця
лебідь
місяцевий
місяць
наразі
обапіл
об'їзд
об’їзд
обʼїзд
образ
опір
право
сонце
тарган
упродовж
фантастика
центр
чухатися
ш
щ
ь
ю
я