// // FeedParser.m // RSXML // // Created by Brent Simmons on 1/4/15. // Copyright (c) 2015 Ranchero Software LLC. All rights reserved. // #import "RSXMLError.h" #import "RSFeedParser.h" #import "FeedParser.h" #import "RSXMLData.h" #import "RSRSSParser.h" #import "RSAtomParser.h" static NSArray *parserClasses(void) { static NSArray *gParserClasses = nil; static dispatch_once_t onceToken; dispatch_once(&onceToken, ^{ gParserClasses = @[[RSRSSParser class], [RSAtomParser class]]; }); return gParserClasses; } static BOOL feedMayBeParseable(RSXMLData *xmlData) { /*Sanity checks.*/ if (!xmlData.data) { return NO; } /*TODO: check size, type, etc.*/ return YES; } static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes); static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes); static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes); static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes); static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes); static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes); static const NSUInteger maxNumberOfBytesToSearch = 4096; static const NSUInteger minNumberOfBytesToSearch = 20; static Class parserClassForXMLData(RSXMLData *xmlData, NSError **error) { if (!feedMayBeParseable(xmlData)) { RSXMLSetError(error, RSXMLErrorNoData, nil); return nil; } // TODO: check for things like images and movies and return nil. const char *bytes = xmlData.data.bytes; NSUInteger numberOfBytes = xmlData.data.length; if (numberOfBytes > minNumberOfBytesToSearch) { if (numberOfBytes > maxNumberOfBytesToSearch) { numberOfBytes = maxNumberOfBytesToSearch; } if (!dataHasLeftCaret(bytes, numberOfBytes)) { RSXMLSetError(error, RSXMLErrorMissingLeftCaret, nil); return nil; } if (optimisticCanParseRSSData(bytes, numberOfBytes)) { return [RSRSSParser class]; } if (optimisticCanParseAtomData(bytes, numberOfBytes)) { return [RSAtomParser class]; } if (optimisticCanParseRDF(bytes, numberOfBytes)) { return [RSRSSParser class]; //TODO: parse RDF feeds, using RSS parser so far ... } if (dataIsProbablyHTML(bytes, numberOfBytes)) { RSXMLSetError(error, RSXMLErrorProbablyHTML, nil); return nil; } if (dataIsSomeWeirdException(bytes, numberOfBytes)) { RSXMLSetError(error, RSXMLErrorContainsXMLErrorsTag, nil); return nil; } } for (Class parserClass in parserClasses()) { if ([parserClass canParseFeed:xmlData]) { return parserClass; //return [[parserClass alloc] initWithXMLData:xmlData]; // does not make sense to return instance } } // Try RSS anyway? libxml would return a parsing error RSXMLSetError(error, RSXMLErrorNoSuitableParser, nil); return nil; } static id parserForXMLData(RSXMLData *xmlData, NSError **error) { Class parserClass = parserClassForXMLData(xmlData, error); if (!parserClass) { return nil; } return [[parserClass alloc] initWithXMLData:xmlData]; } static BOOL canParseXMLData(RSXMLData *xmlData) { return parserClassForXMLData(xmlData, nil) != nil; } static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) { char *foundString = strnstr(bytes, string, numberOfBytes); return foundString != NULL; } static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes) { return didFindString("<", bytes, numberOfBytes); } static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes) { // Won’t catch every single case, which is fine. if (didFindString("