3 // This file is part of class-dump, a utility for examining the Objective-C segment of Mach-O files.
4 // Copyright (C) 1997-2019 Steve Nygard.
6 #import "CDTypeParser.h"
8 #import "CDMethodType.h"
10 #import "CDTypeName.h"
11 #import "CDTypeLexer.h"
13 NSString *CDExceptionName_SyntaxError = @"CDExceptionName_SyntaxError";
15 NSString *CDErrorDomain_TypeParser = @"CDErrorDomain_TypeParser";
17 NSString *CDErrorKey_Type = @"CDErrorKey_Type";
18 NSString *CDErrorKey_RemainingString = @"CDErrorKey_RemainingString";
19 NSString *CDErrorKey_MethodOrVariable = @"CDErrorKey_MethodOrVariable";
20 NSString *CDErrorKey_LocalizedLongDescription = @"CDErrorKey_LocalizedLongDescription";
22 static BOOL debug = NO;
24 static NSString *CDTokenDescription(int token)
27 return [NSString stringWithFormat:@"%d(%c)", token, token];
29 return [NSString stringWithFormat:@"%d", token];
32 @interface CDTypeParser ()
37 @implementation CDTypeParser
43 - (id)initWithString:(NSString *)string;
45 if ((self = [super init])) {
46 // Do some preprocessing first: Replace "<unnamed>::" with just "unnamed::".
47 NSMutableString *str = [string mutableCopy];
48 [str replaceOccurrencesOfString:@"<unnamed>::" withString:@"unnamed::" options:(NSStringCompareOptions)0 range:NSMakeRange(0, [string length])];
50 _lexer = [[CDTypeLexer alloc] initWithString:str];
59 - (NSArray *)parseMethodType:(NSError *__autoreleasing *)error;
64 _lookahead = [self.lexer scanNextToken];
65 result = [self _parseMethodType];
67 @catch (NSException *exception) {
69 NSMutableDictionary *userInfo = [NSMutableDictionary dictionary];
70 NSString *localDesc = [NSString stringWithFormat:@"%@:\n\t type: %@\n\tremaining: %@", [exception reason], self.lexer.string, self.lexer.remainingString];
72 userInfo[CDErrorKey_Type] = self.lexer.string;
73 userInfo[CDErrorKey_RemainingString] = self.lexer.remainingString;
74 userInfo[CDErrorKey_MethodOrVariable] = @"method";
75 userInfo[CDErrorKey_LocalizedLongDescription] = localDesc;
78 if ([exception name] == CDExceptionName_SyntaxError) {
79 code = CDTypeParserCode_SyntaxError;
80 userInfo[NSLocalizedDescriptionKey] = @"Syntax Error";
81 userInfo[NSLocalizedFailureReasonErrorKey] = [exception reason];
83 code = CDTypeParserCode_Default;
84 userInfo[NSLocalizedFailureReasonErrorKey] = [exception reason];
86 *error = [NSError errorWithDomain:CDErrorDomain_TypeParser code:code userInfo:userInfo];
95 - (CDType *)parseType:(NSError *__autoreleasing *)error;
100 _lookahead = [self.lexer scanNextToken];
101 result = [self _parseType];
103 @catch (NSException *exception) {
105 NSMutableDictionary *userInfo = [NSMutableDictionary dictionary];
106 NSString *localDesc = [NSString stringWithFormat:@"%@:\n\t type: %@\n\tremaining: %@", [exception reason], self.lexer.string, self.lexer.remainingString];
108 userInfo[CDErrorKey_Type] = self.lexer.string;
109 userInfo[CDErrorKey_RemainingString] = self.lexer.remainingString;
110 userInfo[CDErrorKey_MethodOrVariable] = @"variable";
111 userInfo[CDErrorKey_LocalizedLongDescription] = localDesc;
114 if ([exception name] == CDExceptionName_SyntaxError) {
115 code = CDTypeParserCode_SyntaxError;
116 userInfo[NSLocalizedDescriptionKey] = @"Syntax Error";
117 userInfo[NSLocalizedFailureReasonErrorKey] = [exception reason];
119 code = CDTypeParserCode_Default;
120 userInfo[NSLocalizedFailureReasonErrorKey] = [exception reason];
122 *error = [NSError errorWithDomain:CDErrorDomain_TypeParser code:code userInfo:userInfo];
131 #pragma mark - Private methods
133 - (void)match:(int)token;
135 [self match:token enterState:self.lexer.state];
138 - (void)match:(int)token enterState:(CDTypeLexerState)newState;
140 if (_lookahead == token) {
141 if (debug) NSLog(@"matched %@", CDTokenDescription(token));
142 self.lexer.state = newState;
143 _lookahead = [self.lexer scanNextToken];
145 [NSException raise:CDExceptionName_SyntaxError format:@"expected token %@, got %@",
146 CDTokenDescription(token),
147 CDTokenDescription(_lookahead)];
151 - (void)error:(NSString *)errorString;
153 [NSException raise:CDExceptionName_SyntaxError format:@"%@", errorString];
156 - (NSArray *)_parseMethodType;
158 NSMutableArray *methodTypes = [NSMutableArray array];
160 // Has to have at least one pair for the return type;
161 // Probably needs at least two more, for object and selector
162 // So it must be <type><number><type><number><type><number>. Three pairs at a minimum.
165 CDType *type = [self _parseType];
166 NSString *number = [self parseNumber];
168 CDMethodType *methodType = [[CDMethodType alloc] initWithType:type offset:number];
169 [methodTypes addObject:methodType];
170 } while ([self isTokenInTypeStartSet:_lookahead]);
175 // Plain object types can be:
177 // @"NSObject" - NSObject *
178 // @"<MyProtocol>" - id <MyProtocol>
179 // But these can also be part of a structure, with the field name in quotes before the type:
180 // "foo"i"bar"i - int foo, int bar
181 // "foo"@"bar"i - id foo, int bar
182 // "foo"@"Foo""bar"i - Foo *foo, int bar
183 // So this is where we need to be careful.
185 // I'm going to make a simplifying assumption: Either the structure/union has member names,
186 // or is doesn't, it can't have some names and be missing others.
187 // The two key tests are:
188 // {my_struct3="field1"@"field2"i}
189 // {my_struct4="field1"@"NSObject""field2"i}
191 - (CDType *)_parseType;
193 return [self _parseTypeInStruct:NO];
196 - (CDType *)_parseTypeInStruct:(BOOL)isInStruct;
200 if (_lookahead == 'j'
208 || _lookahead == 'A') { // modifiers
209 int modifier = _lookahead;
210 [self match:modifier];
212 CDType *unmodifiedType;
213 if ([self isTokenInTypeStartSet:_lookahead])
214 unmodifiedType = [self _parseTypeInStruct:isInStruct];
216 unmodifiedType = nil;
217 result = [[CDType alloc] initModifier:modifier type:unmodifiedType];
218 } else if (_lookahead == '^') { // pointer
222 if (_lookahead == TK_QUOTED_STRING || _lookahead == '}' || _lookahead == ')') {
223 type = [[CDType alloc] initSimpleType:'v'];
224 // Safari on 10.5 has: "m_function"{?="__pfn"^"__delta"i}
225 result = [[CDType alloc] initPointerType:type];
226 } else if (_lookahead == '?') {
228 result = [[CDType alloc] initFunctionPointerType];
230 type = [self _parseTypeInStruct:isInStruct];
231 result = [[CDType alloc] initPointerType:type];
233 } else if (_lookahead == 'b') { // bitfield
235 NSString *number = [self parseNumber];
236 result = [[CDType alloc] initBitfieldType:number];
237 } else if (_lookahead == '@') { // id
240 if (lookahead == TK_QUOTED_STRING) {
241 NSLog(@"%s, quoted string ahead, shouldCheckFieldNames: %d, end: %d",
242 __cmd, shouldCheckFieldNames, [lexer.scanner isAtEnd]);
243 if ([lexer.scanner isAtEnd] == NO)
244 NSLog(@"next character: %d (%c), isInTypeStartSet: %d", lexer.peekChar, lexer.peekChar, [self isTokenInTypeStartSet:lexer.peekChar]);
247 if (_lookahead == TK_QUOTED_STRING && (isInStruct == NO || [self.lexer.lexText isFirstLetterUppercase] || [self isTokenInTypeStartSet:self.lexer.peekChar] == NO)) {
248 NSString *str = self.lexer.lexText;
250 NSUInteger protocolOpenIdx = NSMaxRange([str rangeOfString:@"<"]);
251 NSUInteger protocolCloseIdx = [str rangeOfString:@">" options:NSBackwardsSearch].location;
252 if (protocolOpenIdx != NSNotFound && protocolCloseIdx != NSNotFound) {
253 NSRange protocolRange = NSMakeRange(protocolOpenIdx, protocolCloseIdx - protocolOpenIdx);
254 NSArray *protocols = [[str substringWithRange:protocolRange] componentsSeparatedByString:@","];
256 NSString *typeNameStr = [[str substringToIndex:(protocolOpenIdx - 1)] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
257 CDTypeName *typeName = nil;
258 if ([typeNameStr length] && ![typeNameStr isEqualToString:@"id"]) {
259 typeName = [[CDTypeName alloc] init];
260 typeName.name = typeNameStr;
263 result = [[CDType alloc] initIDType:typeName withProtocols:protocols];
265 CDTypeName *typeName = [[CDTypeName alloc] init];
267 result = [[CDType alloc] initIDType:typeName];
270 [self match:TK_QUOTED_STRING];
271 } else if (_lookahead == '?') {
273 NSArray *blockTypes = nil;
274 if (_lookahead == '<') {
276 blockTypes = [[self _parseMethodType] valueForKeyPath:@"type"];
279 result = [[CDType alloc] initBlockTypeWithTypes:blockTypes];
281 result = [[CDType alloc] initIDType:nil];
283 } else if (_lookahead == '{') { // structure
284 CDTypeLexerState savedState = self.lexer.state;
285 [self match:'{' enterState:CDTypeLexerState_Identifier];
286 CDTypeName *typeName = [self parseTypeName];
287 NSArray *optionalMembers = [self parseOptionalMembers];
288 [self match:'}' enterState:savedState];
290 result = [[CDType alloc] initStructType:typeName members:optionalMembers];
291 } else if (_lookahead == '(') { // union
292 CDTypeLexerState savedState = self.lexer.state;
293 [self match:'(' enterState:CDTypeLexerState_Identifier];
294 if (_lookahead == TK_IDENTIFIER) {
295 CDTypeName *typeName = [self parseTypeName];
296 NSArray *optionalMembers = [self parseOptionalMembers];
297 [self match:')' enterState:savedState];
299 result = [[CDType alloc] initUnionType:typeName members:optionalMembers];
301 NSArray *unionTypes = [self parseUnionTypes];
302 [self match:')' enterState:savedState];
304 result = [[CDType alloc] initUnionType:nil members:unionTypes];
306 } else if (_lookahead == '[') { // array
308 NSString *number = [self parseNumber];
309 CDType *type = [self _parseType];
312 result = [[CDType alloc] initArrayType:type count:number];
313 } else if ([self isTokenInSimpleTypeSet:_lookahead]) { // simple type
314 int simpleType = _lookahead;
315 [self match:simpleType];
316 result = [[CDType alloc] initSimpleType:simpleType];
319 [NSException raise:CDExceptionName_SyntaxError format:@"expected (many things), got %@", CDTokenDescription(_lookahead)];
325 // This seems to be used in method types -- no names
326 - (NSArray *)parseUnionTypes;
328 NSMutableArray *members = [NSMutableArray array];
330 while ([self isTokenInTypeSet:_lookahead]) {
331 CDType *type = [self _parseType];
332 //type.variableName = @"___";
333 [members addObject:type];
339 - (NSArray *)parseOptionalMembers;
343 if (_lookahead == '=') {
345 result = [self parseMemberList];
352 - (NSArray *)parseMemberList;
354 //NSLog(@" > %s", __cmd);
356 NSMutableArray *result = [NSMutableArray array];
358 while (_lookahead == TK_QUOTED_STRING || [self isTokenInTypeSet:_lookahead])
359 [result addObject:[self parseMember]];
361 //NSLog(@"< %s", __cmd);
366 - (CDType *)parseMember;
370 //NSLog(@" > %s", __cmd);
372 if (_lookahead == TK_QUOTED_STRING) {
373 NSString *identifier = nil;
375 while (_lookahead == TK_QUOTED_STRING) {
376 if (identifier == nil)
377 identifier = self.lexer.lexText;
379 // TextMate 1.5.4 has structures like... "storage""stack"{etc} -- two quoted strings next to each other.
380 identifier = [NSString stringWithFormat:@"%@__%@", identifier, self.lexer.lexText];
382 [self match:TK_QUOTED_STRING];
385 //NSLog(@"got identifier: %@", identifier);
386 result = [self _parseTypeInStruct:YES];
387 result.variableName = identifier;
388 //NSLog(@"And parsed struct type.");
390 result = [self _parseTypeInStruct:YES];
393 //NSLog(@"< %s", __cmd);
397 - (CDTypeName *)parseTypeName;
399 CDTypeName *typeName = [[CDTypeName alloc] init];
400 [typeName setName:[self parseIdentifier]];
402 if (_lookahead == '<') {
403 CDTypeLexerState savedState = self.lexer.state;
404 [self match:'<' enterState:CDTypeLexerState_TemplateTypes];
405 [typeName.templateTypes addObject:[self parseTypeName]];
406 while (_lookahead == ',') {
408 [typeName.templateTypes addObject:[self parseTypeName]];
410 [self match:'>' enterState:savedState];
412 if (self.lexer.state == CDTypeLexerState_TemplateTypes) {
413 if (_lookahead == TK_IDENTIFIER) {
414 NSString *suffix = self.lexer.lexText;
416 [self match:TK_IDENTIFIER];
417 [typeName setSuffix:suffix];
423 // This breaks a bunch of the unit tests... need to figure out what's up with that first.
424 // We'll treat "?" as no name, returning nil here instead of testing the type name for this later.
425 if ([[typeName name] isEqualToString:@"?"] && [typeName isTemplateType] == NO)
432 - (NSString *)parseIdentifier;
434 NSString *result = nil;
436 if (_lookahead == TK_IDENTIFIER) {
437 result = self.lexer.lexText;
438 [self match:TK_IDENTIFIER];
444 - (NSString *)parseNumber;
446 if (_lookahead == TK_NUMBER) {
447 NSString *result = self.lexer.lexText;
448 [self match:TK_NUMBER];
455 - (BOOL)isTokenInModifierSet:(int)token;
471 - (BOOL)isTokenInSimpleTypeSet:(int)token;
498 - (BOOL)isTokenInTypeSet:(int)token;
500 if ([self isTokenInModifierSet:token]
501 || [self isTokenInSimpleTypeSet:token]
513 - (BOOL)isTokenInTypeStartSet:(int)token;
529 || [self isTokenInSimpleTypeSet:token])