Fix crash when libxml set error in @autoreleasepool
- libxml will return first parsing error instead of last one - option to replace lower ascii chars with whitespace
This commit is contained in:
@@ -61,6 +61,7 @@
|
|||||||
|
|
||||||
|
|
||||||
@interface RSSAXParser : NSObject
|
@interface RSSAXParser : NSObject
|
||||||
|
@property (nonatomic, strong, readonly) NSError *parsingError;
|
||||||
@property (nonatomic, strong, readonly) NSData *currentCharacters;
|
@property (nonatomic, strong, readonly) NSData *currentCharacters;
|
||||||
@property (nonatomic, strong, readonly) NSString *currentString;
|
@property (nonatomic, strong, readonly) NSString *currentString;
|
||||||
@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
|
@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
|
||||||
|
|||||||
@@ -27,6 +27,8 @@
|
|||||||
#import <libxml/parser.h>
|
#import <libxml/parser.h>
|
||||||
#import "RSSAXParser.h"
|
#import "RSSAXParser.h"
|
||||||
|
|
||||||
|
const NSErrorDomain kLIBXMLParserErrorDomain = @"LIBXMLParserErrorDomain";
|
||||||
|
|
||||||
|
|
||||||
@interface RSSAXParser ()
|
@interface RSSAXParser ()
|
||||||
@property (nonatomic, weak) id<RSSAXParserDelegate> delegate;
|
@property (nonatomic, weak) id<RSSAXParserDelegate> delegate;
|
||||||
@@ -97,6 +99,8 @@ static xmlSAXHandler saxHandlerStruct;
|
|||||||
*/
|
*/
|
||||||
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
|
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
|
||||||
|
|
||||||
|
_parsingError = nil;
|
||||||
|
|
||||||
if (self.context == nil) {
|
if (self.context == nil) {
|
||||||
if (self.isHTMLParser) {
|
if (self.isHTMLParser) {
|
||||||
xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes);
|
xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes);
|
||||||
@@ -342,6 +346,11 @@ static xmlSAXHandler saxHandlerStruct;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
- (void)xmlParsingErrorOccured:(NSError*)error {
|
||||||
|
if (!self.parsingError) // grep first encountered error
|
||||||
|
_parsingError = error;
|
||||||
|
}
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
||||||
|
|
||||||
@@ -369,6 +378,20 @@ static void endElementSAX_HTML(void *context, const xmlChar *localname) {
|
|||||||
[(__bridge RSSAXParser *)context xmlEndHTMLElement:localname];
|
[(__bridge RSSAXParser *)context xmlEndHTMLElement:localname];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void errorOccuredSAX(void *context, const char *format, ...) {
|
||||||
|
xmlErrorPtr err = xmlGetLastError();
|
||||||
|
if (err && err->level == XML_ERR_FATAL) {
|
||||||
|
int errCode = err->code;
|
||||||
|
char * msg = err->message;
|
||||||
|
NSString *errMsg = [[NSString stringWithFormat:@"%s", msg] stringByTrimmingCharactersInSet:
|
||||||
|
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||||
|
NSError *error = [NSError errorWithDomain:kLIBXMLParserErrorDomain code:errCode
|
||||||
|
userInfo:@{ NSLocalizedDescriptionKey: errMsg }];
|
||||||
|
[(__bridge RSSAXParser *)context xmlParsingErrorOccured:error];
|
||||||
|
}
|
||||||
|
xmlResetLastError();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static xmlSAXHandler saxHandlerStruct = {
|
static xmlSAXHandler saxHandlerStruct = {
|
||||||
nil, /* internalSubset */
|
nil, /* internalSubset */
|
||||||
@@ -393,7 +416,7 @@ static xmlSAXHandler saxHandlerStruct = {
|
|||||||
nil, /* processingInstruction */
|
nil, /* processingInstruction */
|
||||||
nil, /* comment */
|
nil, /* comment */
|
||||||
nil, /* warning */
|
nil, /* warning */
|
||||||
nil, /* error */
|
errorOccuredSAX, /* error */
|
||||||
nil, /* fatalError //: unused error() get all the errors */
|
nil, /* fatalError //: unused error() get all the errors */
|
||||||
nil, /* getParameterEntity */
|
nil, /* getParameterEntity */
|
||||||
nil, /* cdataBlock */
|
nil, /* cdataBlock */
|
||||||
|
|||||||
@@ -23,7 +23,6 @@
|
|||||||
|
|
||||||
#import "RSXMLError.h"
|
#import "RSXMLError.h"
|
||||||
|
|
||||||
const NSErrorDomain kLIBXMLParserErrorDomain = @"LIBXMLParserErrorDomain";
|
|
||||||
const NSErrorDomain kRSXMLParserErrorDomain = @"RSXMLParserErrorDomain";
|
const NSErrorDomain kRSXMLParserErrorDomain = @"RSXMLParserErrorDomain";
|
||||||
|
|
||||||
const char * parserDescriptionForError(RSXMLError code);
|
const char * parserDescriptionForError(RSXMLError code);
|
||||||
|
|||||||
@@ -58,6 +58,7 @@
|
|||||||
|
|
||||||
@interface RSXMLParser<__covariant T> : NSObject <RSXMLParserDelegate, RSSAXParserDelegate>
|
@interface RSXMLParser<__covariant T> : NSObject <RSXMLParserDelegate, RSSAXParserDelegate>
|
||||||
@property (nonatomic, readonly, nonnull, copy) NSString *documentURI;
|
@property (nonatomic, readonly, nonnull, copy) NSString *documentURI;
|
||||||
|
@property (nonatomic, assign) BOOL dontStopOnLowerAsciiBytes;
|
||||||
|
|
||||||
+ (instancetype)parserWithXMLData:(RSXMLData * _Nonnull)xmlData;
|
+ (instancetype)parserWithXMLData:(RSXMLData * _Nonnull)xmlData;
|
||||||
|
|
||||||
|
|||||||
@@ -21,8 +21,6 @@
|
|||||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
// SOFTWARE.
|
// SOFTWARE.
|
||||||
|
|
||||||
#import <libxml/xmlerror.h>
|
|
||||||
|
|
||||||
#import "RSXMLParser.h"
|
#import "RSXMLParser.h"
|
||||||
#import "RSXMLData.h"
|
#import "RSXMLData.h"
|
||||||
#import "RSXMLError.h"
|
#import "RSXMLError.h"
|
||||||
@@ -71,6 +69,22 @@
|
|||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
XML allows only specific lower ascii characters (<0x20), namely 0x9, 0xA, and 0xD.
|
||||||
|
See: https://www.w3.org/TR/xml/#charsets
|
||||||
|
*/
|
||||||
|
- (void)replaceLowerAsciiBytesWithSpace {
|
||||||
|
[_xmlData enumerateByteRangesUsingBlock:^(const void * bytes, NSRange byteRange, BOOL * stop) {
|
||||||
|
NSUInteger max = byteRange.location + byteRange.length;
|
||||||
|
for (NSUInteger i = byteRange.location; i < max; i++) {
|
||||||
|
unsigned char c = ((unsigned char*)bytes)[i];
|
||||||
|
if (c < 0x20 && c != 0x9 && c != 0xA && c != 0xD) {
|
||||||
|
((unsigned char*)bytes)[i] = ' '; // replace lower ascii with blank
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Parse the XML data on whatever thread this method is called.
|
Parse the XML data on whatever thread this method is called.
|
||||||
|
|
||||||
@@ -82,24 +96,16 @@
|
|||||||
if (error) *error = _xmlInputError;
|
if (error) *error = _xmlInputError;
|
||||||
return nil;
|
return nil;
|
||||||
}
|
}
|
||||||
|
if (_dontStopOnLowerAsciiBytes) {
|
||||||
|
[self replaceLowerAsciiBytesWithSpace];
|
||||||
|
}
|
||||||
if ([self respondsToSelector:@selector(xmlParserWillStartParsing)] && ![self xmlParserWillStartParsing])
|
if ([self respondsToSelector:@selector(xmlParserWillStartParsing)] && ![self xmlParserWillStartParsing])
|
||||||
return nil;
|
return nil;
|
||||||
|
|
||||||
@autoreleasepool {
|
@autoreleasepool {
|
||||||
xmlResetLastError();
|
|
||||||
[_parser parseBytes:_xmlData.bytes numberOfBytes:_xmlData.length];
|
[_parser parseBytes:_xmlData.bytes numberOfBytes:_xmlData.length];
|
||||||
if (error) {
|
|
||||||
xmlErrorPtr err = xmlGetLastError();
|
|
||||||
if (err && err->level == XML_ERR_FATAL) {
|
|
||||||
int errCode = err->code;
|
|
||||||
char * msg = err->message;
|
|
||||||
NSString *errMsg = [[NSString stringWithFormat:@"%s", msg] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
|
||||||
*error = [NSError errorWithDomain:kLIBXMLParserErrorDomain code:errCode userInfo:@{NSLocalizedDescriptionKey: errMsg}];
|
|
||||||
}
|
|
||||||
xmlResetLastError();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (error) *error = _parser.parsingError;
|
||||||
return [self xmlParserWillReturnDocument];
|
return [self xmlParserWillReturnDocument];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -265,6 +265,28 @@
|
|||||||
#pragma clang diagnostic pop
|
#pragma clang diagnostic pop
|
||||||
}
|
}
|
||||||
|
|
||||||
|
- (void)testLowerAsciiCharacters {
|
||||||
|
NSError *error = nil;
|
||||||
|
RSXMLData *xmlData = [self xmlFile:@"lower-ascii" extension:@"rss"];
|
||||||
|
RSXMLParser *parser = [xmlData getParser];
|
||||||
|
RSParsedFeed *parsedFeed = [parser parseSync:&error];
|
||||||
|
XCTAssertNotNil(error);
|
||||||
|
XCTAssertEqual(parsedFeed.articles.count, 2);
|
||||||
|
parser.dontStopOnLowerAsciiBytes = YES;
|
||||||
|
parsedFeed = [parser parseSync:&error];
|
||||||
|
XCTAssertNil(error);
|
||||||
|
XCTAssertEqual(parsedFeed.articles.count, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
- (void)testBrokenXML {
|
||||||
|
NSError *error = nil;
|
||||||
|
RSXMLData *xmlData = [self xmlFile:@"broken" extension:@"rss"];
|
||||||
|
[[xmlData getParser] parseSync:&error];
|
||||||
|
XCTAssertNotNil(error);
|
||||||
|
XCTAssertEqual(error.code, 76);
|
||||||
|
XCTAssertEqualObjects(error.localizedDescription, @"Opening and ending tag mismatch: channel line 0 and rss");
|
||||||
|
}
|
||||||
|
|
||||||
- (void)testDownloadedFeeds {
|
- (void)testDownloadedFeeds {
|
||||||
NSError *error = nil;
|
NSError *error = nil;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|||||||
26
RSXMLTests/Resources/broken.rss
Normal file
26
RSXMLTests/Resources/broken.rss
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
|
||||||
|
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||||
|
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:atom="http://www.w3.org/2005/Atom"
|
||||||
|
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
|
||||||
|
xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
|
||||||
|
>
|
||||||
|
|
||||||
|
<channel>
|
||||||
|
<title>Manton Reece</title>
|
||||||
|
<atom:link href="http://www.manton.org/feed" rel="self" type="application/rss+xml" />
|
||||||
|
<link>http://www.manton.org</link>
|
||||||
|
<description></description>
|
||||||
|
<lastBuildDate>Fri, 25 Sep 2015 14:26:40 +0000</lastBuildDate>
|
||||||
|
<language>en-US</language>
|
||||||
|
<sy:updatePeriod>hourly</sy:updatePeriod>
|
||||||
|
<sy:updateFrequency>1</sy:updateFrequency>
|
||||||
|
<generator>http://wordpress.org/?v=4.2.5</generator>
|
||||||
|
<item>
|
||||||
|
<title></title>
|
||||||
|
<link>http://www.manton.org/2015/09/3071.html</link>
|
||||||
|
<comments>http://www.manton.org/2015/09/3071.html#comments</comments>
|
||||||
|
<pubDate>Fri,</pubDate>
|
||||||
|
</item>
|
||||||
|
</rss>
|
||||||
30
RSXMLTests/Resources/lower-ascii.rss
Normal file
30
RSXMLTests/Resources/lower-ascii.rss
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?><rss version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>Feed Title</title>
|
||||||
|
<item>
|
||||||
|
<title>1</title>
|
||||||
|
<link>http://someurl.com/1/</link>
|
||||||
|
<description><![CDATA[Description of first]]></description>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<title>2</title>
|
||||||
|
<link>http://someurl.com/2/</link>
|
||||||
|
<description><![CDATA[Description with NULL values]]></description>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<title>3</title>
|
||||||
|
<link>http://someurl.com/3/</link>
|
||||||
|
<description><![CDATA[Description of third]]></description>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<title>4</title>
|
||||||
|
<link>http://someurl.com/4/</link>
|
||||||
|
<description><![CDATA[Description of fourth]]></description>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<title>5</title>
|
||||||
|
<link>http://someurl.com/5/</link>
|
||||||
|
<description><![CDATA[Description of fifth]]></description>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
Reference in New Issue
Block a user