Fix crash when libxml set error in @autoreleasepool

- libxml will return first parsing error instead of last one
- option to replace lower ascii chars with whitespace
This commit is contained in:
relikd
2019-03-06 02:05:09 +01:00
parent f9a3c1c831
commit d9b6641a99
8 changed files with 124 additions and 16 deletions

View File

@@ -61,6 +61,7 @@
@interface RSSAXParser : NSObject
@property (nonatomic, strong, readonly) NSError *parsingError;
@property (nonatomic, strong, readonly) NSData *currentCharacters;
@property (nonatomic, strong, readonly) NSString *currentString;
@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;

View File

@@ -27,6 +27,8 @@
#import <libxml/parser.h>
#import "RSSAXParser.h"
const NSErrorDomain kLIBXMLParserErrorDomain = @"LIBXMLParserErrorDomain";
@interface RSSAXParser ()
@property (nonatomic, weak) id<RSSAXParserDelegate> delegate;
@@ -97,6 +99,8 @@ static xmlSAXHandler saxHandlerStruct;
*/
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
_parsingError = nil;
if (self.context == nil) {
if (self.isHTMLParser) {
xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes);
@@ -342,6 +346,11 @@ static xmlSAXHandler saxHandlerStruct;
}
}
- (void)xmlParsingErrorOccured:(NSError*)error {
if (!self.parsingError) // grep first encountered error
_parsingError = error;
}
@end
@@ -369,6 +378,20 @@ static void endElementSAX_HTML(void *context, const xmlChar *localname) {
[(__bridge RSSAXParser *)context xmlEndHTMLElement:localname];
}
static void errorOccuredSAX(void *context, const char *format, ...) {
xmlErrorPtr err = xmlGetLastError();
if (err && err->level == XML_ERR_FATAL) {
int errCode = err->code;
char * msg = err->message;
NSString *errMsg = [[NSString stringWithFormat:@"%s", msg] stringByTrimmingCharactersInSet:
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
NSError *error = [NSError errorWithDomain:kLIBXMLParserErrorDomain code:errCode
userInfo:@{ NSLocalizedDescriptionKey: errMsg }];
[(__bridge RSSAXParser *)context xmlParsingErrorOccured:error];
}
xmlResetLastError();
}
static xmlSAXHandler saxHandlerStruct = {
nil, /* internalSubset */
@@ -393,7 +416,7 @@ static xmlSAXHandler saxHandlerStruct = {
nil, /* processingInstruction */
nil, /* comment */
nil, /* warning */
nil, /* error */
errorOccuredSAX, /* error */
nil, /* fatalError //: unused error() get all the errors */
nil, /* getParameterEntity */
nil, /* cdataBlock */

View File

@@ -23,7 +23,6 @@
#import "RSXMLError.h"
const NSErrorDomain kLIBXMLParserErrorDomain = @"LIBXMLParserErrorDomain";
const NSErrorDomain kRSXMLParserErrorDomain = @"RSXMLParserErrorDomain";
const char * parserDescriptionForError(RSXMLError code);

View File

@@ -58,6 +58,7 @@
@interface RSXMLParser<__covariant T> : NSObject <RSXMLParserDelegate, RSSAXParserDelegate>
@property (nonatomic, readonly, nonnull, copy) NSString *documentURI;
@property (nonatomic, assign) BOOL dontStopOnLowerAsciiBytes;
+ (instancetype)parserWithXMLData:(RSXMLData * _Nonnull)xmlData;

View File

@@ -21,8 +21,6 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#import <libxml/xmlerror.h>
#import "RSXMLParser.h"
#import "RSXMLData.h"
#import "RSXMLError.h"
@@ -71,6 +69,22 @@
return self;
}
/**
XML allows only specific lower ascii characters (<0x20), namely 0x9, 0xA, and 0xD.
See: https://www.w3.org/TR/xml/#charsets
*/
- (void)replaceLowerAsciiBytesWithSpace {
[_xmlData enumerateByteRangesUsingBlock:^(const void * bytes, NSRange byteRange, BOOL * stop) {
NSUInteger max = byteRange.location + byteRange.length;
for (NSUInteger i = byteRange.location; i < max; i++) {
unsigned char c = ((unsigned char*)bytes)[i];
if (c < 0x20 && c != 0x9 && c != 0xA && c != 0xD) {
((unsigned char*)bytes)[i] = ' '; // replace lower ascii with blank
}
}
}];
}
/**
Parse the XML data on whatever thread this method is called.
@@ -82,24 +96,16 @@
if (error) *error = _xmlInputError;
return nil;
}
if (_dontStopOnLowerAsciiBytes) {
[self replaceLowerAsciiBytesWithSpace];
}
if ([self respondsToSelector:@selector(xmlParserWillStartParsing)] && ![self xmlParserWillStartParsing])
return nil;
@autoreleasepool {
xmlResetLastError();
[_parser parseBytes:_xmlData.bytes numberOfBytes:_xmlData.length];
if (error) {
xmlErrorPtr err = xmlGetLastError();
if (err && err->level == XML_ERR_FATAL) {
int errCode = err->code;
char * msg = err->message;
NSString *errMsg = [[NSString stringWithFormat:@"%s", msg] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
*error = [NSError errorWithDomain:kLIBXMLParserErrorDomain code:errCode userInfo:@{NSLocalizedDescriptionKey: errMsg}];
}
xmlResetLastError();
}
}
if (error) *error = _parser.parsingError;
return [self xmlParserWillReturnDocument];
}