103da56542
A Perl script to change MSVC mangled symbols from their form in 32-bit code to the corresponding mangled symbols in 64-bit code. The script is obviously not known to work 100%, but seems to work well enough for the few cases in LibreOffice (.map files) it has been used on so far.
327 lines
8.6 KiB
Perl
327 lines
8.6 KiB
Perl
#!/usr/bin/perl -w /* -*- indent-tabs-mode: nil -*- */
|
|
|
|
# Version: MPL 1.1 / GPLv3+ / LGPLv3+
|
|
#
|
|
# The contents of this file are subject to the Mozilla Public License Version
|
|
# 1.1 (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
# http://www.mozilla.org/MPL/
|
|
#
|
|
# Software distributed under the License is distributed on an "AS IS" basis,
|
|
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
# for the specific language governing rights and limitations under the
|
|
# License.
|
|
#
|
|
# The Initial Developer of the Original Code is
|
|
# Novell, Inc
|
|
# Portions created by the Initial Developer are Copyright (C) 2011 Novell,
|
|
# Inc. All Rights Reserved.
|
|
#
|
|
# Contributor(s): Tor Lillqvist <tml@iki.fi>
|
|
#
|
|
# Alternatively, the contents of this file may be used under the terms of
|
|
# either the GNU General Public License Version 3 or later (the "GPLv3+"), or
|
|
# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
|
|
# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
|
|
# instead of those above.
|
|
|
|
# Change MSVC mangled C++ names from 32-bit form to the corresponding
|
|
# 64-bit form. Each line of input can contain at most one mangled
|
|
# name.
|
|
|
|
# Based on experimentation with MSVC2008 and the following web pages:
|
|
|
|
# http://www.geoffchappell.com/viewer.htm?doc=studies/msvc/language/decoration/index.htm
|
|
# Thorough but incomplete. Still, describes details the below sources
|
|
# don't mention.
|
|
|
|
# http://cvs.winehq.com/cvsweb/wine/dlls/msvcrt/undname.c
|
|
# Wine's __unDname function, presumably the most complete, although
|
|
# not really written to act as "documentation"
|
|
|
|
# http://mearie.org/documents/mscmangle/
|
|
# Relatively complete but a bit badly structured and terse.
|
|
|
|
# http://en.wikipedia.org/wiki/Microsoft_Visual_C%2B%2B_Name_Mangling
|
|
# seems to be mostly a ripoff on the mearie.org page
|
|
|
|
# Example transformation:
|
|
# ??0ORealDynamicLoader@salhelper@@IAE@PAPAV01@ABVOUString@rtl@@1PAX2@Z =>
|
|
# ??0ORealDynamicLoader@salhelper@@IEAA@PEAPEAV01@AEBVOUString@rtl@@1PEAX2@Z
|
|
|
|
# It should be relatively easy to remove the modification parts of the
|
|
# below code and use the regex for some other task on MSVC mangled
|
|
# names.
|
|
|
|
# The regular expression below accepts also nonsensical mangled names,
|
|
# so it should not be used to verify correctness of mangled names.
|
|
|
|
use strict;
|
|
|
|
my @opstack = ();
|
|
|
|
sub parse_number($)
|
|
{
|
|
my ($num) = @_;
|
|
|
|
return $num + 1 if ($num eq '0' || ($num ge '1' && $num le '9'));
|
|
|
|
$num =~ tr/ABCDEFGHIJKLMNOP@/0123456789ABCDEF /;
|
|
hex($num);
|
|
}
|
|
|
|
sub format_number($)
|
|
{
|
|
my ($num) = @_;
|
|
|
|
return $num - 1 if ($num <= 10);
|
|
|
|
$num = sprintf("%X", $num);
|
|
$num =~ tr/0123456789ABCDEF/ABCDEFGHIJKLMNOP/;
|
|
$num.'@';
|
|
}
|
|
|
|
sub double_thunk($$)
|
|
{
|
|
my ($number, $position) = @_;
|
|
|
|
my $bytes = parse_number($number);
|
|
$bytes *= 2;
|
|
push(@opstack, 'r '.($position - length($number)).' '.length($number).' '.format_number($bytes));
|
|
}
|
|
|
|
while (<>)
|
|
{
|
|
m/
|
|
# Named subpattern definitions. I use names of the form
|
|
# __CamelCase__ for the named subpatters so that they are easier
|
|
# to see.
|
|
(?(DEFINE)
|
|
(?<__Number__>
|
|
\?? ([0-9] | [A-P]+@)
|
|
)
|
|
(?<__32BitChecksum__>
|
|
[A-P]{8}@
|
|
)
|
|
(?<__CallingConvention__>
|
|
(?:
|
|
[AB]
|
|
|
|
|
[C-L]
|
|
(?{ push(@opstack, 'r '.(pos()-1).' 1 A cdecl'); })
|
|
)
|
|
)
|
|
(?<__StringLiteralText__>
|
|
(?:
|
|
[_a-zA-Z0-9]
|
|
|
|
|
\?\$[A-P][A-P]
|
|
|
|
|
\?[0-9A-Za-z]
|
|
){1,20}
|
|
)
|
|
(?<__Identifier__>
|
|
[_a-zA-Z\$][_a-zA-Z0-9\$]*@
|
|
)
|
|
(?<__ArgsZTerminated__>
|
|
(?&__DataTypeInArgs__)+ @? Z
|
|
)
|
|
(?<__ArgsNonZTerminated__>
|
|
(?&__DataTypeInArgs__)+ @?
|
|
)
|
|
(?<__TemplateName__>
|
|
(?&__Identifier__) (?&__ArgsNonZTerminated__)
|
|
)
|
|
(?<__Class__>
|
|
(?:
|
|
[0-9]
|
|
|
|
|
\?\$ (?&__TemplateName__)
|
|
|
|
|
(?&__Identifier__)
|
|
)+@
|
|
)
|
|
(?<__DataTypeCommon__>
|
|
(?:
|
|
# extended types like _int64, bool and wchar_t
|
|
_[D-NW]
|
|
|
|
|
# simple types
|
|
[C-KMNOXZ]
|
|
|
|
|
# class, struct, union, cointerface
|
|
[TUVY] (?&__Class__)
|
|
|
|
|
# references
|
|
[AB]
|
|
(?{ push(@opstack, 'i '.pos().' E reference'); })
|
|
(?&__ModifiedType__)
|
|
|
|
|
# pointers
|
|
[QRS]
|
|
(?{ push(@opstack, 'i '.pos().' E pointer'); })
|
|
(?&__ModifiedType__)
|
|
|
|
|
P
|
|
(?:
|
|
# function pointer
|
|
6 (?&__CallingConvention__) (?&__DataTypeNotInArgs__) (?&__ArgsZTerminated__)
|
|
|
|
|
# other pointer
|
|
(?{ push(@opstack, 'i '.pos().' E pointer'); })
|
|
(?&__ModifiedType__)
|
|
)
|
|
|
|
|
W 4 (?&__Class__)
|
|
|
|
|
[0-9]
|
|
|
|
|
\$ (?:
|
|
[0DQ] (?&__Number__)
|
|
|
|
|
F (?&__Number__){2}
|
|
|
|
|
G (?&__Number__){3}
|
|
|
|
|
\$ [ABCD] (?&__DataTypeNotInArgs__)
|
|
)
|
|
)
|
|
)
|
|
(?<__ModifiedType__>
|
|
[ABCD]
|
|
(?:
|
|
# multidimensional array
|
|
Y (?&__Number__)+
|
|
)?
|
|
(?&__DataTypeNotInArgs__)
|
|
)
|
|
(?<__DataTypeInArgs__>
|
|
(?:
|
|
(?&__DataTypeCommon__)
|
|
|
|
|
# template parameter
|
|
\? (?&__Number__)
|
|
)
|
|
)
|
|
(?<__DataTypeNotInArgs__>
|
|
(?:
|
|
(?&__DataTypeCommon__)
|
|
|
|
|
\? (?&__ModifiedType__)
|
|
)
|
|
)
|
|
)
|
|
|
|
# All mangled names start with a question mark
|
|
\?
|
|
(?:
|
|
# Ctors, dtors, operators etc have separate a priori defined
|
|
# special mangled names like the very simple ?0 for constructor
|
|
# and ?_R16789 for "RTTI Base Class Descriptor at (6,7,8,9)"
|
|
# whatever that might mean.
|
|
(
|
|
\?
|
|
([0-9A-Z]
|
|
|
|
|
_(?:
|
|
# C is for string literals, see below
|
|
# R is RTTI, see immediately below
|
|
[0-9ABD-QS-Z]
|
|
|
|
|
R0(?&__DataTypeNotInArgs__)
|
|
|
|
|
R1(?&__Number__){4}
|
|
|
|
|
R[234]
|
|
|
|
|
_(?:
|
|
E
|
|
)
|
|
)
|
|
)
|
|
)?
|
|
(?&__Class__)
|
|
|
|
(?:
|
|
# Static members and normal variables
|
|
[0-5]
|
|
(?&__DataTypeNotInArgs__)
|
|
[ABCD]
|
|
|
|
|
# Compiler-generated static
|
|
[67]
|
|
[ABCD]
|
|
(?:
|
|
@
|
|
|
|
|
(?&__Class__)
|
|
)
|
|
|
|
|
# Non-static Methods, implicit 'this'
|
|
[ABEFIJMNQRUV]
|
|
[AB]
|
|
(?{ push(@opstack, 'i '.(pos()-1).' E this'); })
|
|
(?&__CallingConvention__)
|
|
(?:
|
|
@
|
|
|
|
|
(?&__DataTypeNotInArgs__)
|
|
)
|
|
(?&__ArgsZTerminated__)
|
|
|
|
|
# Static methods
|
|
[CDKLST]
|
|
(?&__CallingConvention__)
|
|
(?:
|
|
@
|
|
|
|
|
(?&__DataTypeNotInArgs__)
|
|
)
|
|
(?&__ArgsZTerminated__)
|
|
|
|
|
# Thunks
|
|
[GHOPWX]
|
|
((?&__Number__))
|
|
(?{ double_thunk($^N, pos()); })
|
|
[AB]
|
|
(?{ push(@opstack, 'i '.(pos()-1).' E this'); })
|
|
(?&__CallingConvention__)
|
|
(?:
|
|
@
|
|
|
|
|
(?&__DataTypeNotInArgs__)
|
|
)
|
|
(?&__ArgsZTerminated__)
|
|
|
|
|
# Functions
|
|
[YZ]
|
|
(?&__CallingConvention__)
|
|
(?:
|
|
@
|
|
|
|
|
(?&__DataTypeNotInArgs__)
|
|
)
|
|
(?&__ArgsZTerminated__)
|
|
|
|
|
# Template
|
|
\$ (?&__Identifier__) (?&__ArgsNonZTerminated__)
|
|
)
|
|
|
|
|
# pooled string literals
|
|
\?_C\@_[01](?&__Number__)(?&__32BitChecksum__)(?&__StringLiteralText__)@
|
|
)
|
|
/x;
|
|
|
|
while (my $op = pop(@opstack))
|
|
{
|
|
# print STDERR "op=$op\n";
|
|
my @a = split (' ', $op);
|
|
if ($a[0] eq 'i') {
|
|
substr($_,$a[1],0) = $a[2];
|
|
} elsif ($a[0] eq 'r') {
|
|
substr($_,$a[1],$a[2]) = $a[3];
|
|
}
|
|
}
|
|
|
|
print;
|
|
}
|