// // FeedParser.m // RSXML // // Created by Brent Simmons on 1/4/15. // Copyright (c) 2015 Ranchero Software LLC. All rights reserved. // #import "RSFeedParser.h" #import "FeedParser.h" #import "RSXMLData.h" #import "RSRSSParser.h" #import "RSAtomParser.h" static NSArray *parserClasses(void) { static NSArray *gParserClasses = nil; static dispatch_once_t onceToken; dispatch_once(&onceToken, ^{ gParserClasses = @[[RSRSSParser class], [RSAtomParser class]]; }); return gParserClasses; } static BOOL feedMayBeParseable(RSXMLData *xmlData) { /*Sanity checks.*/ if (!xmlData.data) { return NO; } /*TODO: check size, type, etc.*/ return YES; } static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes); static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes); static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes); static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes); static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes); static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes); static const NSUInteger maxNumberOfBytesToSearch = 4096; static const NSUInteger minNumberOfBytesToSearch = 20; static Class parserClassForXMLData(RSXMLData *xmlData) { if (!feedMayBeParseable(xmlData)) { return nil; } // TODO: check for things like images and movies and return nil. const char *bytes = xmlData.data.bytes; NSUInteger numberOfBytes = xmlData.data.length; if (numberOfBytes > minNumberOfBytesToSearch) { if (numberOfBytes > maxNumberOfBytesToSearch) { numberOfBytes = maxNumberOfBytesToSearch; } if (!dataHasLeftCaret(bytes, numberOfBytes)) { return nil; } if (optimisticCanParseRSSData(bytes, numberOfBytes)) { return [RSRSSParser class]; } if (optimisticCanParseAtomData(bytes, numberOfBytes)) { return [RSAtomParser class]; } if (optimisticCanParseRDF(bytes, numberOfBytes)) { return nil; //TODO: parse RDF feeds } if (dataIsProbablyHTML(bytes, numberOfBytes)) { return nil; } if (dataIsSomeWeirdException(bytes, numberOfBytes)) { return nil; } } for (Class parserClass in parserClasses()) { if ([parserClass canParseFeed:xmlData]) { return [[parserClass alloc] initWithXMLData:xmlData]; } } return nil; } static id parserForXMLData(RSXMLData *xmlData) { Class parserClass = parserClassForXMLData(xmlData); if (!parserClass) { return nil; } return [[parserClass alloc] initWithXMLData:xmlData]; } static BOOL canParseXMLData(RSXMLData *xmlData) { return parserClassForXMLData(xmlData) != nil; } static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) { char *foundString = strnstr(bytes, string, numberOfBytes); return foundString != NULL; } static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes) { return didFindString("<", bytes, numberOfBytes); } static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes) { // Won’t catch every single case, which is fine. if (didFindString("