diff --git a/nls/locale.nls b/nls/locale.nls index af2f061944a..caabb128ee4 100644 Binary files a/nls/locale.nls and b/nls/locale.nls differ diff --git a/nls/sortdefault.nls b/nls/sortdefault.nls index d67da7215d6..86389d71e4d 100644 Binary files a/nls/sortdefault.nls and b/nls/sortdefault.nls differ diff --git a/tools/make_unicode b/tools/make_unicode index 1ab5575b465..33253104eb6 100755 --- a/tools/make_unicode +++ b/tools/make_unicode @@ -2108,23 +2108,28 @@ sub load_data() $decomp_compat_table[$src] = \@seq; } - if ($decomp =~ /^\s+([0-9a-fA-F]+)$/) - { - $halfwidth_table[hex $1] = $src; - $fullwidth_table[$src] = hex $1; - } - elsif ($decomp =~ /^\s+([0-9a-fA-F]+)$/) - { - next if hex $1 == 0x5c; # don't remap backslash - $fullwidth_table[hex $1] = $src; - $halfwidth_table[$src] = hex $1; - } - elsif ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/) + if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/) { # decomposition of the form " 1234" -> use char if type is known - if ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial") + my $dst = hex $2; + if ($1 eq "narrow") { - ${joining_forms{$1}}[hex $2] = $src; + $halfwidth_table[$dst] = $src; + $fullwidth_table[$src] = $dst; + } + elsif ($1 eq "wide") + { + next if $dst == 0x5c; # don't remap backslash + $fullwidth_table[$dst] = $src; + $halfwidth_table[$src] = $dst; + } + elsif ($1 eq "font" || $1 eq "square" || $1 eq "circle") + { + $fullwidth_table[$src] = $dst if $src >= 0x10000; + } + elsif ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial") + { + ${joining_forms{$1}}[$dst] = $src; } } elsif ($decomp =~ /^\s+0020\s+([0-9a-fA-F]+)/) @@ -2143,7 +2148,11 @@ sub load_data() my $dst = hex $1; # Single char decomposition $decomp_table[$src] = $decomp_compat_table[$src] = [ $dst ]; - $cjk_compat_table[$src] = $dst if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/; + if ($name =~ /^CJK COMPATIBILITY IDEOGRAPH/) + { + $cjk_compat_table[$src] = $dst; + $fullwidth_table[$src] = $dst if $src >= 0x10000; + } } } } @@ -2236,16 +2245,24 @@ sub load_data() { s/\#.*//; # remove comments next if /^\s*$/; - if (/^U\+([0-9a-fA-F]+)\s+kTraditionalVariant\s+U\+([0-9a-fA-F]+)/) + if (/^U\+([0-9a-fA-F]{4})\s+kTraditionalVariant\s+U\+([0-9a-fA-F]{4})$/) { + next if hex $1 < 0x4dc0; # skip extension A $chinese_traditional_table[hex $1] = hex $2; } - elsif (/^U\+([0-9a-fA-F]+)\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]+)/) + elsif (/^U\+([0-9a-fA-F]{4})\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]{4})$/) { + next if hex $1 < 0x4dc0; # skip extension A $chinese_simplified_table[hex $1] = hex $2; } } close $UNIHAN; + foreach my $i (0xf900..0xfaff) + { + next unless defined $cjk_compat_table[$i]; + next if defined $chinese_simplified_table[$cjk_compat_table[$i]]; + $chinese_simplified_table[$i] = $cjk_compat_table[$i]; + } } @@ -3188,21 +3205,22 @@ sub compress_array($$@) ################################################################ # dump a char -> 16-bit value mapping table using two-level tables -sub dump_two_level_mapping($$@) +sub dump_two_level_mapping($$$@) { my $name = shift; my $def = shift; my $size = shift; my $type = $size == 16 ? "unsigned short" : "unsigned int"; - my @row_array = compress_array( 4096, $def, @_[0..65535] ); - my @array = compress_array( 256, 0, @row_array[0..4095] ); + my (@array, @row_array, @data, @row_data); + (@row_array[0..4095], @data) = compress_array( 4096, $def, @_[0..65535] ); + (@array[0..255], @row_data) = compress_array( 256, 0, @row_array ); - for (my $i = 256; $i < @array; $i++) { $array[$i] += @array - 4096; } + for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += @row_data + 256 - 4096; } - printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%d] =\n{\n", $type, $name, @array + @row_array - 4096; - printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array[0..255] ); - printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @array[256..$#array] ); - printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @row_array[4096..$#row_array] ); + printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%d] =\n{\n", $type, $name, @array + @row_data + @data; + printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array ); + printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @row_data ); + printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @data ); } ################################################################ @@ -3235,22 +3253,39 @@ sub dump_three_level_mapping($$@) sub dump_binary_case_table(@) { my (@table) = @_; - my $max_char = 0x10000; - my $level1 = $max_char / 16; - my $level2 = $level1 / 16; - my @difftable; + my @res; + for (my $i = 0; $i < @table; $i++) { next unless defined $table[$i]; - $difftable[$i] = ($table[$i] - $i) & 0xffff; + $difftable[$i] = ($table[$i] - $i) & 0xffffffff; } - my @row_array = compress_array( $level1, 0, @difftable[0..$max_char-1] ); - my @array = compress_array( $level2, 0, @row_array[0..$level1-1] ); - my $offset = @array - $level1; - for (my $i = $level2; $i < @array; $i++) { $array[$i] += $offset; } - return pack "S<*", 1 + $offset + @row_array, @array, @row_array[$level1..$#row_array]; + my (@low_array1, @low_array2, @low_data, @low_row_data); + (@low_array2[0..4095], @low_data) = compress_array( 4096, 0, @difftable[0..65535] ); + (@low_array1[0..255], @low_row_data) = compress_array( 256, 0, @low_array2 ); + + if (scalar @table > 0x10000) + { + my (@high_array1, @high_array2, @high_data, @high_row_data); + (@high_array2[0..32767], @high_data) = compress_array( 32768, 0, @difftable[65536..$MAX_CHAR] ); + (@high_array1[0..1023], @high_row_data) = compress_array( 1024, 0, @high_array2 ); + + push @res, map { $_ + 1024; } @low_array1; + push @res, map { $_ + @res + @low_row_data + @low_data; } @high_array1; + push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data; + push @res, @low_data; + push @res, map { 2 * ($_ - 32768) + @res + @high_row_data; } @high_row_data; + return pack( "S<*", 1 + scalar @res + 2 * scalar @high_data, @res ) . pack( "L<*", @high_data ); + } + else + { + push @res, @low_array1; + push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data; + push @res, @low_data; + return pack "S<*", 1 + scalar @res, @res; + } } ################################################################ @@ -3261,8 +3296,8 @@ sub dump_intl_nls($) my @lower_table = @tolower_table; remove_linguistic_mappings( \@upper_table, \@lower_table ); - my $upper = dump_binary_case_table( @upper_table ); - my $lower = dump_binary_case_table( @lower_table ); + my $upper = dump_binary_case_table( @upper_table[0..65535] ); + my $lower = dump_binary_case_table( @lower_table[0..65535] ); my $filename = shift; open OUTPUT,">$filename.new" or die "Cannot create $filename"; @@ -4000,12 +4035,13 @@ sub dump_sortkey_table($$) $table[$i] = $typestr{$str}; } - my @rows = compress_array( 4096, 0, @table[0..65535] ); - my @array = compress_array( 256, 0, @rows[0..4095] ); + my (@rows, @array, @data, @row_data); + (@rows[0..4095], @data) = compress_array( 4096, 0, @table[0..65535] ); + (@array[0..255], @row_data) = compress_array( 256, 0, @rows ); for (my $i = 0; $i < 256; $i++) { $array[$i] *= 2; } # we need byte offsets - for (my $i = 256; $i < @array; $i++) { $array[$i] += 2 * @array - 4096; } + for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += 2 * @row_data + 512 - 4096; } - my $arraystr = pack("S<*", @array) . pack("C*", @rows[4096..$#rows]); + my $arraystr = pack("S<*", @array, @row_data) . pack("C*", @data); my $chartypes = pack "S<2", 4 + length($types) + length($arraystr), 2 + length($types); $chartypes = align_string( 8, $chartypes . $types . $arraystr ); @@ -5101,6 +5137,15 @@ sub build_charmaps_data() my $data = ""; # MAP_FOLDDIGITS + my @digits = (ord('0') .. ord('9')); + $digitmap_table[0x3007] = $digits[0]; # Ideographic Zero + @digitmap_table[0x0c78..0x0c7b] = @digits[0..3]; # Telugu Fraction Digits + @digitmap_table[0x0c7c..0x0c7e] = @digits[1..3]; # Telugu Fraction Digits + @digitmap_table[0x3021..0x3029] = @digits[1..9]; # Hangzhou Numerals + @digitmap_table[0xa8e0..0xa8e9] = @digits; # Combining Devanagari Digits + @digitmap_table[0x10107..0x1010f] = @digits[1..9]; # Aegean Numbers + $digitmap_table[0x10320] = $digits[1]; # Old Italic Numerals + $digitmap_table[0x10321] = $digits[5]; # Old Italic Numerals $data .= dump_binary_case_table( @digitmap_table ); # CJK compatibility map diff --git a/tools/winedump/nls.c b/tools/winedump/nls.c index 6f69aeeb538..ab628517362 100644 --- a/tools/winedump/nls.c +++ b/tools/winedump/nls.c @@ -45,6 +45,17 @@ static unsigned short mapchar( const unsigned short *table, unsigned int len, un return ch + table[off]; } +static unsigned int mapchar_high( const unsigned short *table, unsigned int len, unsigned int ch ) +{ + unsigned short ch1 = 0xd800 | ((ch - 0x10000) >> 10); + unsigned short ch2 = 0xdc00 | (ch & 0x3ff); + unsigned int off = table[256 + (ch1 - 0xd800)] + ((ch2 >> 5) & 0x1f); + if (off >= len) return 0; + off = table[off] + 2 * (ch2 & 0x1f); + if (off >= len) return 0; + return ch + *(UINT *)&table[off]; +} + static void dump_offset_table( const unsigned short *table, unsigned int len ) { int i, j, empty, ch; @@ -67,6 +78,27 @@ static void dump_offset_table( const unsigned short *table, unsigned int len ) else printf( " %04x", ch ); } } + if (table[0] >= 0x500) + { + for (i = 0x10000; i < 0x110000; i += 16) + { + for (j = 0; j < 16; j++) if (mapchar_high( table, len, i + j ) != i + j) break; + if (j == 16) + { + empty++; + continue; + } + if (empty) printf( "\n[...]" ); + empty = 0; + printf( "\n%06x:", i ); + for (j = 0; j < 16; j++) + { + ch = mapchar_high( table, len, i + j ); + if (ch == i + j) printf( " ......" ); + else printf( " %06x", ch ); + } + } + } if (empty) printf( "\n[...]" ); }