From 10d37ec3f45920cbbbadca27e93e0bdbf929055c Mon Sep 17 00:00:00 2001 From: relikd Date: Mon, 10 Dec 2018 17:59:01 +0100 Subject: [PATCH] Added support for error messages & fixed bugs: - parserClassForXMLData: did return instance instead of class - DateParser: array index typo --- RSXML/FeedParser.h | 2 +- RSXML/RSAtomParser.m | 2 +- RSXML/RSDateParser.m | 2 +- RSXML/RSFeedParser.h | 3 ++ RSXML/RSFeedParser.m | 78 +++++++++++++++++++++++++++++++++++++------- RSXML/RSRSSParser.m | 2 +- 6 files changed, 73 insertions(+), 16 deletions(-) diff --git a/RSXML/FeedParser.h b/RSXML/FeedParser.h index 0f8df6b..cda6714 100644 --- a/RSXML/FeedParser.h +++ b/RSXML/FeedParser.h @@ -18,7 +18,7 @@ - (nonnull instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData; -- (nullable RSParsedFeed *)parseFeed:(NSError * _Nullable * _Nullable)error; +- (nullable RSParsedFeed *)parseFeed; @end diff --git a/RSXML/RSAtomParser.m b/RSXML/RSAtomParser.m index 911d20c..6f9428c 100755 --- a/RSXML/RSAtomParser.m +++ b/RSXML/RSAtomParser.m @@ -106,7 +106,7 @@ #pragma mark - API -- (RSParsedFeed *)parseFeed:(NSError **)error { +- (RSParsedFeed *)parseFeed { [self parse]; diff --git a/RSXML/RSDateParser.m b/RSXML/RSDateParser.m index e08b3bf..7b3947c 100644 --- a/RSXML/RSDateParser.m +++ b/RSXML/RSDateParser.m @@ -108,7 +108,7 @@ static NSInteger nextMonthValue(const char *bytes, NSUInteger numberOfBytes, NSU return NSNotFound; if (monthCharacters[0] == 'J' || monthCharacters[0] == 'j') { //Jan, Jun, Jul - if (monthCharacters[1] == 'a' || monthCharacters[i] == 'A') + if (monthCharacters[1] == 'a' || monthCharacters[1] == 'A') return RSJanuary; if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') { if (monthCharacters[2] == 'n' || monthCharacters[2] == 'N') diff --git a/RSXML/RSFeedParser.h b/RSXML/RSFeedParser.h index 40b2fef..2b9d1e2 100644 --- a/RSXML/RSFeedParser.h +++ b/RSXML/RSFeedParser.h @@ -16,6 +16,9 @@ NS_ASSUME_NONNULL_BEGIN +static NSString *kLIBXMLParserErrorDomain = @"LIBXMLParserErrorDomain"; +static NSString *kRSXMLParserErrorDomain = @"RSXMLParserErrorDomain"; + BOOL RSCanParseFeed(RSXMLData *xmlData); diff --git a/RSXML/RSFeedParser.m b/RSXML/RSFeedParser.m index d4fee6c..6e4f36d 100644 --- a/RSXML/RSFeedParser.m +++ b/RSXML/RSFeedParser.m @@ -6,6 +6,7 @@ // Copyright (c) 2015 Ranchero Software LLC. All rights reserved. // +#import #import "RSFeedParser.h" #import "FeedParser.h" #import "RSXMLData.h" @@ -48,9 +49,43 @@ static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes); static const NSUInteger maxNumberOfBytesToSearch = 4096; static const NSUInteger minNumberOfBytesToSearch = 20; -static Class parserClassForXMLData(RSXMLData *xmlData) { +typedef enum { + RSXMLErrorNoData = 100, + RSXMLErrorMissingLeftCaret, + RSXMLErrorProbablyHTML, + RSXMLErrorContainsXMLErrorsTag, + RSXMLErrorNoSuitableParser +} RSXMLError; + +static void setError(NSError **error, RSXMLError code) { + if (!error) { + return; + } + NSString *msg = @""; + switch (code) { // switch statement will warn if an enum value is missing + case RSXMLErrorNoData: + msg = @"Couldn't parse feed. No data available."; + break; + case RSXMLErrorMissingLeftCaret: + msg = @"Couldn't parse feed. Missing left caret character ('<')."; + break; + case RSXMLErrorProbablyHTML: + msg = @"Couldn't parse feed. Expecting XML data but found html data."; + break; + case RSXMLErrorContainsXMLErrorsTag: + msg = @"Couldn't parse feed. XML contains 'errors' tag."; + break; + case RSXMLErrorNoSuitableParser: + msg = @"Couldn't parse feed. No suitable parser found. XML document not well-formed."; + break; + } + *error = [NSError errorWithDomain:kRSXMLParserErrorDomain code:code userInfo:@{NSLocalizedDescriptionKey: msg}]; +} + +static Class parserClassForXMLData(RSXMLData *xmlData, NSError **error) { if (!feedMayBeParseable(xmlData)) { + setError(error, RSXMLErrorNoData); return nil; } @@ -66,40 +101,42 @@ static Class parserClassForXMLData(RSXMLData *xmlData) { } if (!dataHasLeftCaret(bytes, numberOfBytes)) { + setError(error, RSXMLErrorMissingLeftCaret); return nil; } - if (optimisticCanParseRSSData(bytes, numberOfBytes)) { return [RSRSSParser class]; } if (optimisticCanParseAtomData(bytes, numberOfBytes)) { return [RSAtomParser class]; } - if (optimisticCanParseRDF(bytes, numberOfBytes)) { - return nil; //TODO: parse RDF feeds + return [RSRSSParser class]; //TODO: parse RDF feeds, using RSS parser so far ... } - if (dataIsProbablyHTML(bytes, numberOfBytes)) { + setError(error, RSXMLErrorProbablyHTML); return nil; } if (dataIsSomeWeirdException(bytes, numberOfBytes)) { + setError(error, RSXMLErrorContainsXMLErrorsTag); return nil; } } for (Class parserClass in parserClasses()) { if ([parserClass canParseFeed:xmlData]) { - return [[parserClass alloc] initWithXMLData:xmlData]; + return parserClass; + //return [[parserClass alloc] initWithXMLData:xmlData]; // does not make sense to return instance } } - + // Try RSS anyway? libxml would return a parsing error + setError(error, RSXMLErrorNoSuitableParser); return nil; } -static id parserForXMLData(RSXMLData *xmlData) { +static id parserForXMLData(RSXMLData *xmlData, NSError **error) { - Class parserClass = parserClassForXMLData(xmlData); + Class parserClass = parserClassForXMLData(xmlData, error); if (!parserClass) { return nil; } @@ -108,7 +145,7 @@ static id parserForXMLData(RSXMLData *xmlData) { static BOOL canParseXMLData(RSXMLData *xmlData) { - return parserClassForXMLData(xmlData) != nil; + return parserClassForXMLData(xmlData, nil) != nil; } static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) { @@ -210,7 +247,24 @@ void RSParseFeed(RSXMLData *xmlData, RSParsedFeedBlock callback) { RSParsedFeed *RSParseFeedSync(RSXMLData *xmlData, NSError **error) { - id parser = parserForXMLData(xmlData); - return [parser parseFeed:error]; + xmlResetLastError(); + id parser = parserForXMLData(xmlData, error); + if (error && *error) { + //printf("ERROR in parserForXMLData(): %s\n", [[*error localizedDescription] UTF8String]); + return nil; + } + RSParsedFeed *parsedResult = [parser parseFeed]; + + xmlErrorPtr err = xmlGetLastError(); + if (err && error) { + int errCode = err->code; + char * msg = err->message; + //if (err->level == XML_ERR_FATAL) + NSString *errMsg = [[NSString stringWithFormat:@"%s", msg] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; + *error = [NSError errorWithDomain:kLIBXMLParserErrorDomain code:errCode userInfo:@{NSLocalizedDescriptionKey: errMsg}]; + //printf("ERROR in [parseFeed] (%d): %s\n", err->level, [[*error localizedDescription] UTF8String]); + xmlResetLastError(); + } + return parsedResult; } diff --git a/RSXML/RSRSSParser.m b/RSXML/RSRSSParser.m index a37318a..218311c 100755 --- a/RSXML/RSRSSParser.m +++ b/RSXML/RSRSSParser.m @@ -101,7 +101,7 @@ #pragma mark - API -- (RSParsedFeed *)parseFeed:(NSError **)error { +- (RSParsedFeed *)parseFeed { [self parse];