bug 744357 - implement mappings from Unicode's SpecialCasing.txt for text-transform. r=smontagu
authorJonathan Kew <jkew@mozilla.com>
Tue, 24 Apr 2012 18:53:39 +0100
changeset 96805 9854d4b315ff34b01df457e8323e3b3e63239e60
parent 96804 8dd03fe9b613f66d364556a95856e2ce61ec6246
child 96806 0f9ae38003d13757224c92546ee1b650e76e9900
push id173
push userlsblakk@mozilla.com
push dateFri, 24 Aug 2012 15:39:16 +0000
treeherdermozilla-release@bcc45eb1fb41 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmontagu
bugs744357
milestone14.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 744357 - implement mappings from Unicode's SpecialCasing.txt for text-transform. r=smontagu
intl/unicharutil/tools/genSpecialCasingData.pl
intl/unicharutil/util/Makefile.in
intl/unicharutil/util/nsSpecialCasingData.cpp
intl/unicharutil/util/nsSpecialCasingData.h
intl/unicharutil/util/objs.mk
layout/generic/nsTextRunTransformations.cpp
new file mode 100755
--- /dev/null
+++ b/intl/unicharutil/tools/genSpecialCasingData.pl
@@ -0,0 +1,287 @@
+#!/usr/bin/env perl
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This tool is used to extract "special" (one-to-many) case mappings
+# into a form that can be used by nsTextRunTransformations.
+
+use strict;
+
+if ($#ARGV != 1) {
+  print <<__EOT;
+# Run this tool using a command line of the form
+#
+#     perl genSpecialCasingData.pl UnicodeData.txt SpecialCasing.txt
+#
+# The nsSpecialCasingData.cpp file will be written to standard output.
+#
+# This tool will also write up-to-date versions of the test files
+#     all-{upper,lower,title}.html
+# and corresponding -ref files in the current directory.
+#
+__EOT
+  exit 0;
+}
+
+my %allLower;
+my %allUpper;
+my %allTitle;
+my %compositions;
+my %gc;
+open FH, "< $ARGV[0]" or die "can't open $ARGV[0] (should be UnicodeData.txt)\n";
+while (<FH>) {
+  chomp;
+  my @fields = split /;/;
+  next if ($fields[1] =~ /</); # ignore ranges etc
+  my $usv = hex "0x$fields[0]";
+  $allUpper{$usv} = $fields[12] if $fields[12] ne '';
+  $allLower{$usv} = $fields[13] if $fields[13] ne '';
+  $allTitle{$usv} = $fields[14] if $fields[14] ne '';
+  $gc{$usv} = $fields[2];
+  # we only care about non-singleton canonical decomps
+  my $decomp = $fields[5];
+  next if $decomp eq '' or $decomp =~ /</ or not $decomp =~ / /;
+  $compositions{$decomp} = sprintf("%04X", $usv);
+}
+close FH;
+
+my %specialLower;
+my %specialUpper;
+my %specialTitle;
+my %charName;
+my @headerLines;
+open FH, "< $ARGV[1]" or die "can't open $ARGV[1] (should be SpecialCasing.txt)\n";
+while (<FH>) {
+  chomp;
+  m/#\s*(.+)$/;
+  my $comment = $1;
+  if ($comment =~ /^(SpecialCasing-|Date:)/) {
+    push @headerLines, $comment;
+    next;
+  }
+  s/#.*//;
+  s/;\s*$//;
+  next if $_ eq '';
+  my @fields = split /; */;
+  next unless (scalar @fields) == 4;
+  my $usv = hex "0x$fields[0]";
+  addIfSpecial(\%specialLower, $usv, $fields[1]);
+  addIfSpecial(\%specialTitle, $usv, $fields[2]);
+  addIfSpecial(\%specialUpper, $usv, $fields[3]);
+  $charName{$usv} = $comment;
+}
+close FH;
+
+print <<__END__;
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Auto-generated from files in the Unicode Character Database
+   by genSpecialCasingData.pl - do not edit! */
+
+#include "nsSpecialCasingData.h"
+#include "mozilla/Util.h" // for ArrayLength
+#include <stdlib.h>       // for bsearch
+
+__END__
+map { print "/* $_ */\n" } @headerLines;
+
+print <<__END__;
+
+using mozilla::unicode::MultiCharMapping;
+
+__END__
+
+printMappings('Lower', \%specialLower);
+printMappings('Upper', \%specialUpper);
+printMappings('Title', \%specialTitle);
+
+print <<__END__;
+static int CompareMCM(const void* aKey, const void* aElement)
+{
+  const PRUint32 ch = *static_cast<const PRUint32*>(aKey);
+  const MultiCharMapping* mcm = static_cast<const MultiCharMapping*>(aElement);
+  return int(ch) - int(mcm->mOriginalChar);
+}
+
+#define MAKE_SPECIAL_CASE_ACCESSOR(which) \\
+  const MultiCharMapping* \\
+  Special##which(PRUint32 aChar) \\
+  { \\
+    const void* p = bsearch(&aChar, CaseSpecials_##which, \\
+                            mozilla::ArrayLength(CaseSpecials_##which), \\
+                            sizeof(MultiCharMapping), CompareMCM); \\
+    return static_cast<const MultiCharMapping*>(p); \\
+  }
+
+namespace mozilla {
+namespace unicode {
+
+MAKE_SPECIAL_CASE_ACCESSOR(Lower)
+MAKE_SPECIAL_CASE_ACCESSOR(Upper)
+MAKE_SPECIAL_CASE_ACCESSOR(Title)
+
+} // namespace unicode
+} // namespace mozilla
+__END__
+
+addSpecialsTo(\%allLower, \%specialLower);
+addSpecialsTo(\%allUpper, \%specialUpper);
+addSpecialsTo(\%allTitle, \%specialTitle);
+
+my $testFont = "../fonts/dejavu-sans/DejaVuSans.ttf";
+genTest('lower', \%allLower);
+genTest('upper', \%allUpper);
+genTitleTest();
+
+sub printMappings {
+  my ($whichMapping, $hash) = @_;
+  print "static const MultiCharMapping CaseSpecials_${whichMapping}[] = {\n";
+  foreach my $key (sort { $a <=> $b } keys %$hash) {
+    my @chars = split(/ /, $hash->{$key});
+    printf "  { 0x%04x, {0x%04x, 0x%04x, 0x%04x} }, // %s\n", $key,
+           hex "0x0$chars[0]", hex "0x0$chars[1]", hex "0x0$chars[2]",
+           "$charName{$key}";
+  }
+  print "};\n\n";
+};
+
+sub addIfSpecial {
+  my ($hash, $usv, $mapping) = @_;
+  return unless $mapping =~ / /;
+  # only do compositions that start with the initial char
+  foreach (keys %compositions) {
+    $mapping =~ s/^$_/$compositions{$_}/;
+  }
+  $hash->{$usv} = $mapping;
+};
+
+sub addSpecialsTo {
+  my ($hash, $specials) = @_;
+  foreach my $key (keys %$specials) {
+    $hash->{$key} = $specials->{$key};
+  }
+};
+
+sub genTest {
+  my ($whichMapping, $hash) = @_;
+  open OUT, "> all-$whichMapping.html";
+  print OUT <<__END__;
+<!DOCTYPE html>
+<html>
+ <head>
+  <meta http-equiv="Content-type" content="text/html; charset=utf-8">
+  <style type="text/css">
+   \@font-face { font-family: foo; src: url($testFont); }
+   p { font-family: foo; text-transform: ${whichMapping}case; }
+  </style>
+ </head>
+ <body>
+  <p>
+__END__
+  foreach my $key (sort { $a <=> $b } keys %$hash) {
+    printf OUT "&#x%04X;", $key;
+    print OUT " <!-- $charName{$key} -->" if exists $charName{$key};
+    print OUT "\n";
+  }
+  print OUT <<__END__;
+  </p>
+ </body>
+</html>
+__END__
+  close OUT;
+
+  open OUT, "> all-$whichMapping-ref.html";
+  print OUT <<__END__;
+<!DOCTYPE html>
+<html>
+ <head>
+  <meta http-equiv="Content-type" content="text/html; charset=utf-8">
+  <style type="text/css">
+   \@font-face { font-family: foo; src: url($testFont); }
+   p { font-family: foo; }
+  </style>
+ </head>
+ <body>
+  <p>
+__END__
+  foreach my $key (sort { $a <=> $b } keys %$hash) {
+    print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $hash->{$key}));
+    print OUT " <!-- $charName{$key} -->" if exists $charName{$key};
+    print OUT "\n";
+  }
+  print OUT <<__END__;
+  </p>
+ </body>
+</html>
+__END__
+  close OUT;
+};
+
+sub genTitleTest {
+  open OUT, "> all-title.html";
+  print OUT <<__END__;
+<!DOCTYPE html>
+<html>
+ <head>
+  <meta http-equiv="Content-type" content="text/html; charset=utf-8">
+  <style type="text/css">
+   \@font-face { font-family: foo; src: url($testFont); }
+   p { font-family: foo; text-transform: capitalize; }
+  </style>
+ </head>
+ <body>
+  <p>
+__END__
+  foreach my $key (sort { $a <=> $b } keys %allTitle) {
+    printf OUT "&#x%04X;x", $key;
+    print OUT " <!-- $charName{$key} -->" if exists $charName{$key};
+    print OUT "\n";
+  }
+  print OUT <<__END__;
+  </p>
+ </body>
+</html>
+__END__
+  close OUT;
+
+  open OUT, "> all-title-ref.html";
+  print OUT <<__END__;
+<!DOCTYPE html>
+<html>
+ <head>
+  <meta http-equiv="Content-type" content="text/html; charset=utf-8">
+  <style type="text/css">
+   \@font-face { font-family: foo; src: url($testFont); }
+   p { font-family: foo; }
+  </style>
+ </head>
+ <body>
+  <p>
+__END__
+  foreach my $key (sort { $a <=> $b } keys %allTitle) {
+    # capitalize is only applied to characters with GC=L* or N*...
+    if ($gc{$key} =~ /^[LN]/) {
+      # ...and those that are already uppercase are not transformed
+      if (exists $allUpper{$key}) {
+        print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $allTitle{$key}));
+      } else {
+        printf OUT "&#x%04X;", $key;
+      }
+      print OUT "x";
+    } else {
+      printf OUT "&#x%04X;X", $key;
+    }
+    print OUT " <!-- $charName{$key} -->" if exists $charName{$key};
+    print OUT "\n";
+  }
+  print OUT <<__END__;
+  </p>
+ </body>
+</html>
+__END__
+  close OUT;
+};
--- a/intl/unicharutil/util/Makefile.in
+++ b/intl/unicharutil/util/Makefile.in
@@ -57,23 +57,25 @@ SDK_LIBRARY = $(LIBRARY)
 
 
 SDK_HEADERS = \
 	nsUnicharUtils.h \
 	$(NULL)
 
 EXPORTS = \
 	nsBidiUtils.h \
+	nsSpecialCasingData.h \
 	nsUnicodeProperties.h \
 	nsUnicodeScriptCodes.h \
 	$(NULL)
 
 CPPSRCS	= \
 	nsUnicharUtils.cpp \
 	nsBidiUtils.cpp \
+	nsSpecialCasingData.cpp \
 	nsUnicodeProperties.cpp \
 	$(NULL)
 
 FORCE_STATIC_LIB = 1
 USE_STATIC_LIBS = 1
 
 LOCAL_INCLUDES += -I$(srcdir)/../src
 
new file mode 100644
--- /dev/null
+++ b/intl/unicharutil/util/nsSpecialCasingData.cpp
@@ -0,0 +1,202 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Auto-generated from files in the Unicode Character Database
+   by genSpecialCasingData.pl - do not edit! */
+
+#include "nsSpecialCasingData.h"
+#include "mozilla/Util.h" // for ArrayLength
+#include <stdlib.h>       // for bsearch
+
+/* SpecialCasing-6.1.0.txt */
+/* Date: 2011-11-27, 05:10:51 GMT [MD] */
+
+using mozilla::unicode::MultiCharMapping;
+
+static const MultiCharMapping CaseSpecials_Lower[] = {
+  { 0x0130, {0x0069, 0x0307, 0x0000} }, // LATIN CAPITAL LETTER I WITH DOT ABOVE
+};
+
+static const MultiCharMapping CaseSpecials_Upper[] = {
+  { 0x00df, {0x0053, 0x0053, 0x0000} }, // LATIN SMALL LETTER SHARP S
+  { 0x0149, {0x02bc, 0x004e, 0x0000} }, // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+  { 0x01f0, {0x004a, 0x030c, 0x0000} }, // LATIN SMALL LETTER J WITH CARON
+  { 0x0390, {0x03aa, 0x0301, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+  { 0x03b0, {0x03ab, 0x0301, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+  { 0x0587, {0x0535, 0x0552, 0x0000} }, // ARMENIAN SMALL LIGATURE ECH YIWN
+  { 0x1e96, {0x0048, 0x0331, 0x0000} }, // LATIN SMALL LETTER H WITH LINE BELOW
+  { 0x1e97, {0x0054, 0x0308, 0x0000} }, // LATIN SMALL LETTER T WITH DIAERESIS
+  { 0x1e98, {0x0057, 0x030a, 0x0000} }, // LATIN SMALL LETTER W WITH RING ABOVE
+  { 0x1e99, {0x0059, 0x030a, 0x0000} }, // LATIN SMALL LETTER Y WITH RING ABOVE
+  { 0x1e9a, {0x0041, 0x02be, 0x0000} }, // LATIN SMALL LETTER A WITH RIGHT HALF RING
+  { 0x1f50, {0x03a5, 0x0313, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH PSILI
+  { 0x1f52, {0x03a5, 0x0313, 0x0300} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+  { 0x1f54, {0x03a5, 0x0313, 0x0301} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+  { 0x1f56, {0x03a5, 0x0313, 0x0342} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+  { 0x1f80, {0x1f08, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+  { 0x1f81, {0x1f09, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+  { 0x1f82, {0x1f0a, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+  { 0x1f83, {0x1f0b, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+  { 0x1f84, {0x1f0c, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+  { 0x1f85, {0x1f0d, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+  { 0x1f86, {0x1f0e, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1f87, {0x1f0f, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1f88, {0x1f08, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+  { 0x1f89, {0x1f09, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+  { 0x1f8a, {0x1f0a, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+  { 0x1f8b, {0x1f0b, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+  { 0x1f8c, {0x1f0c, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+  { 0x1f8d, {0x1f0d, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+  { 0x1f8e, {0x1f0e, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+  { 0x1f8f, {0x1f0f, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+  { 0x1f90, {0x1f28, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+  { 0x1f91, {0x1f29, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+  { 0x1f92, {0x1f2a, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+  { 0x1f93, {0x1f2b, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+  { 0x1f94, {0x1f2c, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+  { 0x1f95, {0x1f2d, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+  { 0x1f96, {0x1f2e, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1f97, {0x1f2f, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1f98, {0x1f28, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+  { 0x1f99, {0x1f29, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+  { 0x1f9a, {0x1f2a, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+  { 0x1f9b, {0x1f2b, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+  { 0x1f9c, {0x1f2c, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+  { 0x1f9d, {0x1f2d, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+  { 0x1f9e, {0x1f2e, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+  { 0x1f9f, {0x1f2f, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+  { 0x1fa0, {0x1f68, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+  { 0x1fa1, {0x1f69, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+  { 0x1fa2, {0x1f6a, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+  { 0x1fa3, {0x1f6b, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+  { 0x1fa4, {0x1f6c, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+  { 0x1fa5, {0x1f6d, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+  { 0x1fa6, {0x1f6e, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1fa7, {0x1f6f, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1fa8, {0x1f68, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+  { 0x1fa9, {0x1f69, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+  { 0x1faa, {0x1f6a, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+  { 0x1fab, {0x1f6b, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+  { 0x1fac, {0x1f6c, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+  { 0x1fad, {0x1f6d, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+  { 0x1fae, {0x1f6e, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+  { 0x1faf, {0x1f6f, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+  { 0x1fb2, {0x1fba, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+  { 0x1fb3, {0x0391, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+  { 0x1fb4, {0x0386, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+  { 0x1fb6, {0x0391, 0x0342, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+  { 0x1fb7, {0x0391, 0x0342, 0x0399} }, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1fbc, {0x0391, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+  { 0x1fc2, {0x1fca, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+  { 0x1fc3, {0x0397, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+  { 0x1fc4, {0x0389, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+  { 0x1fc6, {0x0397, 0x0342, 0x0000} }, // GREEK SMALL LETTER ETA WITH PERISPOMENI
+  { 0x1fc7, {0x0397, 0x0342, 0x0399} }, // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1fcc, {0x0397, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+  { 0x1fd2, {0x03aa, 0x0300, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+  { 0x1fd3, {0x03aa, 0x0301, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+  { 0x1fd6, {0x0399, 0x0342, 0x0000} }, // GREEK SMALL LETTER IOTA WITH PERISPOMENI
+  { 0x1fd7, {0x03aa, 0x0342, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+  { 0x1fe2, {0x03ab, 0x0300, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+  { 0x1fe3, {0x03ab, 0x0301, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+  { 0x1fe4, {0x03a1, 0x0313, 0x0000} }, // GREEK SMALL LETTER RHO WITH PSILI
+  { 0x1fe6, {0x03a5, 0x0342, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+  { 0x1fe7, {0x03ab, 0x0342, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+  { 0x1ff2, {0x1ffa, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+  { 0x1ff3, {0x03a9, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+  { 0x1ff4, {0x038f, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+  { 0x1ff6, {0x03a9, 0x0342, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+  { 0x1ff7, {0x03a9, 0x0342, 0x0399} }, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1ffc, {0x03a9, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+  { 0xfb00, {0x0046, 0x0046, 0x0000} }, // LATIN SMALL LIGATURE FF
+  { 0xfb01, {0x0046, 0x0049, 0x0000} }, // LATIN SMALL LIGATURE FI
+  { 0xfb02, {0x0046, 0x004c, 0x0000} }, // LATIN SMALL LIGATURE FL
+  { 0xfb03, {0x0046, 0x0046, 0x0049} }, // LATIN SMALL LIGATURE FFI
+  { 0xfb04, {0x0046, 0x0046, 0x004c} }, // LATIN SMALL LIGATURE FFL
+  { 0xfb05, {0x0053, 0x0054, 0x0000} }, // LATIN SMALL LIGATURE LONG S T
+  { 0xfb06, {0x0053, 0x0054, 0x0000} }, // LATIN SMALL LIGATURE ST
+  { 0xfb13, {0x0544, 0x0546, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN NOW
+  { 0xfb14, {0x0544, 0x0535, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN ECH
+  { 0xfb15, {0x0544, 0x053b, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN INI
+  { 0xfb16, {0x054e, 0x0546, 0x0000} }, // ARMENIAN SMALL LIGATURE VEW NOW
+  { 0xfb17, {0x0544, 0x053d, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN XEH
+};
+
+static const MultiCharMapping CaseSpecials_Title[] = {
+  { 0x00df, {0x0053, 0x0073, 0x0000} }, // LATIN SMALL LETTER SHARP S
+  { 0x0149, {0x02bc, 0x004e, 0x0000} }, // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+  { 0x01f0, {0x004a, 0x030c, 0x0000} }, // LATIN SMALL LETTER J WITH CARON
+  { 0x0390, {0x03aa, 0x0301, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+  { 0x03b0, {0x03ab, 0x0301, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+  { 0x0587, {0x0535, 0x0582, 0x0000} }, // ARMENIAN SMALL LIGATURE ECH YIWN
+  { 0x1e96, {0x0048, 0x0331, 0x0000} }, // LATIN SMALL LETTER H WITH LINE BELOW
+  { 0x1e97, {0x0054, 0x0308, 0x0000} }, // LATIN SMALL LETTER T WITH DIAERESIS
+  { 0x1e98, {0x0057, 0x030a, 0x0000} }, // LATIN SMALL LETTER W WITH RING ABOVE
+  { 0x1e99, {0x0059, 0x030a, 0x0000} }, // LATIN SMALL LETTER Y WITH RING ABOVE
+  { 0x1e9a, {0x0041, 0x02be, 0x0000} }, // LATIN SMALL LETTER A WITH RIGHT HALF RING
+  { 0x1f50, {0x03a5, 0x0313, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH PSILI
+  { 0x1f52, {0x03a5, 0x0313, 0x0300} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+  { 0x1f54, {0x03a5, 0x0313, 0x0301} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+  { 0x1f56, {0x03a5, 0x0313, 0x0342} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+  { 0x1fb2, {0x1fba, 0x0345, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+  { 0x1fb4, {0x0386, 0x0345, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+  { 0x1fb6, {0x0391, 0x0342, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+  { 0x1fb7, {0x0391, 0x0342, 0x0345} }, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1fc2, {0x1fca, 0x0345, 0x0000} }, // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+  { 0x1fc4, {0x0389, 0x0345, 0x0000} }, // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+  { 0x1fc6, {0x0397, 0x0342, 0x0000} }, // GREEK SMALL LETTER ETA WITH PERISPOMENI
+  { 0x1fc7, {0x0397, 0x0342, 0x0345} }, // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+  { 0x1fd2, {0x03aa, 0x0300, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+  { 0x1fd3, {0x03aa, 0x0301, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+  { 0x1fd6, {0x0399, 0x0342, 0x0000} }, // GREEK SMALL LETTER IOTA WITH PERISPOMENI
+  { 0x1fd7, {0x03aa, 0x0342, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+  { 0x1fe2, {0x03ab, 0x0300, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+  { 0x1fe3, {0x03ab, 0x0301, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+  { 0x1fe4, {0x03a1, 0x0313, 0x0000} }, // GREEK SMALL LETTER RHO WITH PSILI
+  { 0x1fe6, {0x03a5, 0x0342, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+  { 0x1fe7, {0x03ab, 0x0342, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+  { 0x1ff2, {0x1ffa, 0x0345, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+  { 0x1ff4, {0x038f, 0x0345, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+  { 0x1ff6, {0x03a9, 0x0342, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+  { 0x1ff7, {0x03a9, 0x0342, 0x0345} }, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+  { 0xfb00, {0x0046, 0x0066, 0x0000} }, // LATIN SMALL LIGATURE FF
+  { 0xfb01, {0x0046, 0x0069, 0x0000} }, // LATIN SMALL LIGATURE FI
+  { 0xfb02, {0x0046, 0x006c, 0x0000} }, // LATIN SMALL LIGATURE FL
+  { 0xfb03, {0x0046, 0x0066, 0x0069} }, // LATIN SMALL LIGATURE FFI
+  { 0xfb04, {0x0046, 0x0066, 0x006c} }, // LATIN SMALL LIGATURE FFL
+  { 0xfb05, {0x0053, 0x0074, 0x0000} }, // LATIN SMALL LIGATURE LONG S T
+  { 0xfb06, {0x0053, 0x0074, 0x0000} }, // LATIN SMALL LIGATURE ST
+  { 0xfb13, {0x0544, 0x0576, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN NOW
+  { 0xfb14, {0x0544, 0x0565, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN ECH
+  { 0xfb15, {0x0544, 0x056b, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN INI
+  { 0xfb16, {0x054e, 0x0576, 0x0000} }, // ARMENIAN SMALL LIGATURE VEW NOW
+  { 0xfb17, {0x0544, 0x056d, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN XEH
+};
+
+static int CompareMCM(const void* aKey, const void* aElement)
+{
+  const PRUint32 ch = *static_cast<const PRUint32*>(aKey);
+  const MultiCharMapping* mcm = static_cast<const MultiCharMapping*>(aElement);
+  return int(ch) - int(mcm->mOriginalChar);
+}
+
+#define MAKE_SPECIAL_CASE_ACCESSOR(which) \
+  const MultiCharMapping* \
+  Special##which(PRUint32 aChar) \
+  { \
+    const void* p = bsearch(&aChar, CaseSpecials_##which, \
+                            mozilla::ArrayLength(CaseSpecials_##which), \
+                            sizeof(MultiCharMapping), CompareMCM); \
+    return static_cast<const MultiCharMapping*>(p); \
+  }
+
+namespace mozilla {
+namespace unicode {
+
+MAKE_SPECIAL_CASE_ACCESSOR(Lower)
+MAKE_SPECIAL_CASE_ACCESSOR(Upper)
+MAKE_SPECIAL_CASE_ACCESSOR(Title)
+
+} // namespace unicode
+} // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/intl/unicharutil/util/nsSpecialCasingData.h
@@ -0,0 +1,26 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "prtypes.h"
+
+namespace mozilla {
+namespace unicode {
+
+// Multi-character mappings (from SpecialCasing.txt) map a single Unicode
+// value to a sequence of 2 or 3 Unicode characters. There are currently none
+// defined outside the BMP, so we can use PRUnichar here. Unused trailing
+// positions in mMappedChars are set to 0.
+struct MultiCharMapping {
+  PRUnichar mOriginalChar;
+  PRUnichar mMappedChars[3];
+};
+
+// Return a pointer to the special case mapping for the given character;
+// returns NULL if no such mapping is defined.
+const MultiCharMapping* SpecialUpper(PRUint32 aCh);
+const MultiCharMapping* SpecialLower(PRUint32 aCh);
+const MultiCharMapping* SpecialTitle(PRUint32 aCh);
+
+} // namespace unicode
+} // namespace mozilla
--- a/intl/unicharutil/util/objs.mk
+++ b/intl/unicharutil/util/objs.mk
@@ -32,12 +32,13 @@
 # the provisions above, a recipient may use your version of this file under
 # the terms of any one of the MPL, the GPL or the LGPL.
 #
 # ***** END LICENSE BLOCK *****
 
 INTL_UNICHARUTIL_UTIL_LCPPSRCS = \
 	nsUnicharUtils.cpp \
 	nsBidiUtils.cpp \
+	nsSpecialCasingData.cpp \
 	nsUnicodeProperties.cpp \
 	$(NULL)
 
 INTL_UNICHARUTIL_UTIL_CPPSRCS = $(addprefix $(topsrcdir)/intl/unicharutil/util/, $(INTL_UNICHARUTIL_UTIL_LCPPSRCS))
--- a/layout/generic/nsTextRunTransformations.cpp
+++ b/layout/generic/nsTextRunTransformations.cpp
@@ -42,18 +42,17 @@
 #include "nsGkAtoms.h"
 
 #include "nsStyleConsts.h"
 #include "nsStyleContext.h"
 #include "gfxContext.h"
 #include "nsContentUtils.h"
 #include "nsUnicharUtils.h"
 #include "nsUnicodeProperties.h"
-
-#define SZLIG 0x00DF
+#include "nsSpecialCasingData.h"
 
 // Unicode characters needing special casing treatment in tr/az languages
 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE  0x0130
 #define LATIN_SMALL_LETTER_DOTLESS_I           0x0131
 
 // Greek sigma needs custom handling for the lowercase transform; for details
 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
@@ -153,21 +152,28 @@ nsTransformingTextRunFactory::MakeTextRu
 
 /**
  * Copy a given textrun, but merge certain characters into a single logical
  * character. Glyphs for a character are added to the glyph list for the previous
  * character and then the merged character is eliminated. Visually the results
  * are identical.
  * 
  * This is used for text-transform:uppercase when we encounter a SZLIG,
- * whose uppercase form is "SS".
+ * whose uppercase form is "SS", or other ligature or precomposed form
+ * that expands to multiple codepoints during case transformation.
  * 
  * This function is unable to merge characters when they occur in different
- * glyph runs. It's hard to see how this could happen, but if it does, we just
- * discard the characters-to-merge.
+ * glyph runs. This only happens in tricky edge cases where a character was
+ * decomposed by case-mapping (e.g. there's no precomposed uppercase version
+ * of an accented lowercase letter), and then font-matching caused the
+ * diacritics to be assigned to a different font than the base character.
+ * In this situation, the diacritic(s) get discarded, which is less than
+ * ideal, but they probably weren't going to render very well anyway.
+ * Bug 543200 will improve this by making font-matching operate on entire
+ * clusters instead of individual codepoints.
  * 
  * For simplicity, this produces a textrun containing all DetailedGlyphs,
  * no simple glyphs. So don't call it unless you really have merging to do.
  * 
  * @param aCharsToMerge when aCharsToMerge[i] is true, this character is
  * merged into the previous character
  */
 static void
@@ -183,19 +189,21 @@ MergeCharactersInTextRun(gfxTextRun* aDe
     gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
     nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType,
                                      offset, false);
     if (NS_FAILED(rv))
       return;
 
     bool anyMissing = false;
     PRUint32 mergeRunStart = iter.GetStringStart();
-    PRUint32 k;
-    for (k = iter.GetStringStart(); k < iter.GetStringEnd(); ++k) {
-      const gfxTextRun::CompressedGlyph g = aSrc->GetCharacterGlyphs()[k];
+    const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs();
+    gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
+    PRUint32 stringEnd = iter.GetStringEnd();
+    for (PRUint32 k = iter.GetStringStart(); k < stringEnd; ++k) {
+      const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
       if (g.IsSimpleGlyph()) {
         if (!anyMissing) {
           gfxTextRun::DetailedGlyph details;
           details.mGlyphID = g.GetSimpleGlyph();
           details.mAdvance = g.GetSimpleAdvance();
           details.mXOffset = 0;
           details.mYOffset = 0;
           glyphs.AppendElement(details);
@@ -205,50 +213,49 @@ MergeCharactersInTextRun(gfxTextRun* aDe
           anyMissing = true;
           glyphs.Clear();
         }
         if (g.GetGlyphCount() > 0) {
           glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
         }
       }
 
-      // We could teach this method to handle merging of characters that aren't
-      // cluster starts or ligature group starts, but this is really only used
-      // to merge S's (uppercase &szlig;), so it's not worth it.
-
       if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
-        NS_ASSERTION(g.IsClusterStart() && g.IsLigatureGroupStart(),
-                     "Don't know how to merge this stuff");
+        // next char is supposed to merge with current, so loop without
+        // writing current merged glyph to the destination
         continue;
       }
 
-      NS_ASSERTION(mergeRunStart == k ||
-                   (g.IsClusterStart() && g.IsLigatureGroupStart()),
-                   "Don't know how to merge this stuff");
-
       // If the start of the merge run is actually a character that should
       // have been merged with the previous character (this can happen
-      // if there's a font change in the middle of a szlig, for example),
+      // if there's a font change in the middle of a case-mapped character,
+      // that decomposed into a sequence of base+diacritics, for example),
       // just discard the entire merge run. See comment at start of this
       // function.
+      NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart],
+                       "unable to merge across a glyph run boundary, "
+                       "glyph(s) discarded");
       if (!aCharsToMerge[mergeRunStart]) {
-        gfxTextRun::CompressedGlyph mergedGlyphs =
-          aSrc->GetCharacterGlyphs()[mergeRunStart];
         if (anyMissing) {
-          mergedGlyphs.SetMissing(glyphs.Length());
+          mergedGlyph.SetMissing(glyphs.Length());
         } else {
-          mergedGlyphs.SetComplex(true, true, glyphs.Length());
+          mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
+                                 mergedGlyph.IsLigatureGroupStart(),
+                                 glyphs.Length());
         }
-        aDest->SetGlyphs(offset, mergedGlyphs, glyphs.Elements());
+        aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
         ++offset;
       }
 
       glyphs.Clear();
       anyMissing = false;
       mergeRunStart = k + 1;
+      if (mergeRunStart < stringEnd) {
+        mergedGlyph = srcGlyphs[mergeRunStart];
+      }
     }
     NS_ASSERTION(glyphs.Length() == 0,
                  "Leftover glyphs, don't request merging of the last character with its next!");  
   }
   NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
 }
 
 static gfxTextRunFactory::Parameters
@@ -305,17 +312,17 @@ nsFontVariantTextRunFactory::RebuildText
         isLowercase = runIsLowercase;
       } else {
         if (styles[i]->GetStyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {
           PRUint32 ch = str[i];
           if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
             ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
           }
           PRUint32 ch2 = ToUpperCase(ch);
-          isLowercase = ch != ch2 || ch == SZLIG;
+          isLowercase = ch != ch2 || mozilla::unicode::SpecialUpper(ch);
         } else {
           // Don't transform the character! I.e., pretend that it's not lowercase
         }
       }
     }
 
     if ((i == length || runIsLowercase != isLowercase) && runStart < i) {
       nsAutoPtr<nsTransformedTextRun> transformedChild;
@@ -394,17 +401,18 @@ nsCaseTransformTextRunFactory::RebuildTe
     nsStyleContext* styleContext = styles[i];
 
     charsToMergeArray.AppendElement(false);
     styleArray.AppendElement(styleContext);
     canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
 
     PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
       : styleContext->GetStyleText()->mTextTransform;
-    bool extraChar = false;
+    int extraChars = 0;
+    const mozilla::unicode::MultiCharMapping *mcm;
 
     if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
     }
 
     if (lang != styleContext->GetStyleFont()->mLanguage) {
       lang = styleContext->GetStyleFont()->mLanguage;
       if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
@@ -415,21 +423,29 @@ nsCaseTransformTextRunFactory::RebuildTe
         languageSpecificCasing = eDutch;
       } else {
         languageSpecificCasing = eNone;
       }
     }
 
     switch (style) {
     case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
-      if (languageSpecificCasing == eTurkish && ch == 'I') {
-        ch = LATIN_SMALL_LETTER_DOTLESS_I;
-        prevIsLetter = true;
-        sigmaIndex = PRUint32(-1);
-        break;
+      if (languageSpecificCasing == eTurkish) {
+        if (ch == 'I') {
+          ch = LATIN_SMALL_LETTER_DOTLESS_I;
+          prevIsLetter = true;
+          sigmaIndex = PRUint32(-1);
+          break;
+        }
+        if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
+          ch = 'i';
+          prevIsLetter = true;
+          sigmaIndex = PRUint32(-1);
+          break;
+        }
       }
 
       // Special lowercasing behavior for Greek Sigma: note that this is listed
       // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
       // language-specific mapping; it applies regardless of the language of
       // the element.
       //
       // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
@@ -468,64 +484,90 @@ nsCaseTransformTextRunFactory::RebuildTe
           // to SMALL SIGMA
           ch = GREEK_SMALL_LETTER_SIGMA;
           sigmaIndex = PRUint32(-1);
         }
         prevIsLetter = true;
         break;
       }
 
-      ch = ToLowerCase(ch);
-
       // ignore diacritics for the purpose of contextual sigma mapping;
       // otherwise, reset prevIsLetter appropriately and clear the
       // sigmaIndex marker
       if (cat != nsIUGenCategory::kMark) {
         prevIsLetter = (cat == nsIUGenCategory::kLetter);
         sigmaIndex = PRUint32(-1);
       }
+
+      mcm = mozilla::unicode::SpecialLower(ch);
+      if (mcm) {
+        int j = 0;
+        while (j < 2 && mcm->mMappedChars[j + 1]) {
+          convertedString.Append(mcm->mMappedChars[j]);
+          ++extraChars;
+          ++j;
+        }
+        ch = mcm->mMappedChars[j];
+        break;
+      }
+
+      ch = ToLowerCase(ch);
       break;
 
     case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
-      if (ch == SZLIG) {
-        convertedString.Append('S');
-        extraChar = true;
-        ch = 'S';
-        break;
-      }
       if (languageSpecificCasing == eTurkish && ch == 'i') {
         ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
         break;
       }
+
+      mcm = mozilla::unicode::SpecialUpper(ch);
+      if (mcm) {
+        int j = 0;
+        while (j < 2 && mcm->mMappedChars[j + 1]) {
+          convertedString.Append(mcm->mMappedChars[j]);
+          ++extraChars;
+          ++j;
+        }
+        ch = mcm->mMappedChars[j];
+        break;
+      }
+
       ch = ToUpperCase(ch);
       break;
 
     case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
       if (capitalizeDutchIJ && ch == 'j') {
         ch = 'J';
         capitalizeDutchIJ = false;
         break;
       }
       capitalizeDutchIJ = false;
       if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
-        if (ch == SZLIG) {
-          convertedString.Append('S');
-          extraChar = true;
-          ch = 'S';
-          break;
-        }
         if (languageSpecificCasing == eTurkish && ch == 'i') {
           ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
           break;
         }
         if (languageSpecificCasing == eDutch && ch == 'i') {
           ch = 'I';
           capitalizeDutchIJ = true;
           break;
         }
+
+        mcm = mozilla::unicode::SpecialTitle(ch);
+        if (mcm) {
+          int j = 0;
+          while (j < 2 && mcm->mMappedChars[j + 1]) {
+            convertedString.Append(mcm->mMappedChars[j]);
+            ++extraChars;
+            ++j;
+          }
+          ch = mcm->mMappedChars[j];
+          break;
+        }
+
         ch = ToTitleCase(ch);
       }
       break;
 
     default:
       break;
     }
 
@@ -535,21 +577,22 @@ nsCaseTransformTextRunFactory::RebuildTe
       convertedString.Append(H_SURROGATE(ch));
       convertedString.Append(L_SURROGATE(ch));
       i++;
       charsToMergeArray.AppendElement(false);
       styleArray.AppendElement(styleContext);
       canBreakBeforeArray.AppendElement(false);
     }
 
-    if (extraChar) {
+    while (extraChars > 0) {
       ++extraCharsCount;
       charsToMergeArray.AppendElement(true);
       styleArray.AppendElement(styleContext);
       canBreakBeforeArray.AppendElement(false);
+      --extraChars;
     }
   }
 
   PRUint32 flags;
   gfxTextRunFactory::Parameters innerParams =
       GetParametersForInner(aTextRun, &flags, aRefContext);
   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();