Refactoring to v.2.0
This commit is contained in:
@@ -1,24 +0,0 @@
|
||||
//
|
||||
// FeedParser.h
|
||||
// RSXML
|
||||
//
|
||||
// Created by Brent Simmons on 7/12/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
@import Foundation;
|
||||
|
||||
@class RSParsedFeed;
|
||||
@class RSXMLData;
|
||||
|
||||
|
||||
@protocol FeedParser <NSObject>
|
||||
|
||||
+ (BOOL)canParseFeed:(RSXMLData * _Nonnull)xmlData;
|
||||
|
||||
- (nonnull instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData;
|
||||
|
||||
- (nullable RSParsedFeed *)parseFeed;
|
||||
|
||||
|
||||
@end
|
||||
28
RSXML/NSDictionary+RSXML.h
Normal file
28
RSXML/NSDictionary+RSXML.h
Normal file
@@ -0,0 +1,28 @@
|
||||
//
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
@interface NSDictionary (RSXML)
|
||||
- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key;
|
||||
@end
|
||||
41
RSXML/NSDictionary+RSXML.m
Normal file
41
RSXML/NSDictionary+RSXML.m
Normal file
@@ -0,0 +1,41 @@
|
||||
//
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "NSDictionary+RSXML.h"
|
||||
|
||||
@implementation NSDictionary (RSXML)
|
||||
|
||||
- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key {
|
||||
id obj = self[key];
|
||||
if (obj) {
|
||||
return obj;
|
||||
}
|
||||
for (NSString *oneKey in self.allKeys) {
|
||||
if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) {
|
||||
return self[oneKey];
|
||||
}
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
@end
|
||||
@@ -1,16 +1,34 @@
|
||||
//
|
||||
// NSString+RSXML.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 9/25/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
|
||||
@interface NSString (RSXML)
|
||||
|
||||
- (NSString *)rs_stringByDecodingHTMLEntities;
|
||||
- (nonnull NSString *)rsxml_md5HashString;
|
||||
- (nullable NSString *)absoluteURLWithBase:(nonnull NSURL *)baseURL;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@@ -1,23 +1,66 @@
|
||||
//
|
||||
// NSString+RSXML.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 9/25/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "NSString+RSXML.h"
|
||||
|
||||
#import <CommonCrypto/CommonDigest.h>
|
||||
|
||||
@interface NSScanner (RSXML)
|
||||
|
||||
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
#pragma mark - NSString
|
||||
|
||||
|
||||
@implementation NSString (RSXML)
|
||||
|
||||
- (NSData *)rsxml_md5Hash {
|
||||
|
||||
NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding];
|
||||
unsigned char hash[CC_MD5_DIGEST_LENGTH];
|
||||
CC_MD5(data.bytes, (CC_LONG)data.length, hash);
|
||||
|
||||
return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH];
|
||||
}
|
||||
|
||||
- (NSString *)rsxml_md5HashString {
|
||||
|
||||
NSData *md5Data = [self rsxml_md5Hash];
|
||||
const Byte *bytes = md5Data.bytes;
|
||||
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]];
|
||||
}
|
||||
|
||||
- (NSString *)absoluteURLWithBase:(NSURL *)baseURL {
|
||||
if (baseURL && ![[self lowercaseString] hasPrefix:@"http"]) {
|
||||
NSURL *resolvedURL = [NSURL URLWithString:self relativeToURL:baseURL];
|
||||
if (resolvedURL.absoluteString) {
|
||||
return resolvedURL.absoluteString;
|
||||
}
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (NSString *)rs_stringByDecodingHTMLEntities {
|
||||
|
||||
@autoreleasepool {
|
||||
@@ -106,16 +149,18 @@ static NSString *RSXMLStringWithValue(unichar value);
|
||||
|
||||
@end
|
||||
|
||||
|
||||
#pragma mark - NSScanner
|
||||
|
||||
|
||||
@implementation NSScanner (RSXML)
|
||||
|
||||
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
|
||||
|
||||
NSString *s = self.string;
|
||||
NSUInteger initialScanLocation = self.scanLocation;
|
||||
static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity.
|
||||
|
||||
while (true) {
|
||||
|
||||
unichar ch = [s characterAtIndex:self.scanLocation];
|
||||
if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
|
||||
break;
|
||||
@@ -138,12 +183,15 @@ static NSString *RSXMLStringWithValue(unichar value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return NO;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
|
||||
#pragma mark - C Functions
|
||||
|
||||
|
||||
static NSString *RSXMLStringWithValue(unichar value) {
|
||||
|
||||
return [[NSString alloc] initWithFormat:@"%C", value];
|
||||
|
||||
@@ -1,13 +1,32 @@
|
||||
//
|
||||
// RSAtomParser.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 1/15/15.
|
||||
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "FeedParser.h"
|
||||
#import "RSFeedParser.h"
|
||||
|
||||
@interface RSAtomParser : NSObject <FeedParser>
|
||||
// <feed> <entry>
|
||||
// https://validator.w3.org/feed/docs/rfc4287.html
|
||||
|
||||
@interface RSAtomParser : RSFeedParser
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,601 +1,253 @@
|
||||
//
|
||||
// RSAtomParser.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 1/15/15.
|
||||
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import <libxml/xmlstring.h>
|
||||
#import "RSAtomParser.h"
|
||||
#import "RSSAXParser.h"
|
||||
#import "FeedParser.h"
|
||||
#import "RSParsedFeed.h"
|
||||
#import "RSParsedArticle.h"
|
||||
#import "RSXMLData.h"
|
||||
#import "NSString+RSXML.h"
|
||||
#import "RSDateParser.h"
|
||||
|
||||
static NSString *kAlternateValue = @"alternate";
|
||||
static NSString *kRelatedValue = @"related";
|
||||
|
||||
@interface RSAtomParser () <RSSAXParserDelegate>
|
||||
|
||||
@property (nonatomic) NSData *feedData;
|
||||
@property (nonatomic) NSString *urlString;
|
||||
@property (nonatomic) BOOL endFeedFound;
|
||||
@property (nonatomic) BOOL parsingXHTML;
|
||||
@property (nonatomic) BOOL parsingSource;
|
||||
@property (nonatomic) BOOL parsingArticle;
|
||||
@property (nonatomic) BOOL parsingAuthor;
|
||||
@property (nonatomic) NSMutableArray *attributesStack;
|
||||
@property (nonatomic, readonly) NSDictionary *currentAttributes;
|
||||
@property (nonatomic, assign) BOOL endFeedFound;
|
||||
@property (nonatomic, assign) BOOL parsingXHTML;
|
||||
@property (nonatomic, assign) BOOL parsingSource;
|
||||
@property (nonatomic, assign) BOOL parsingArticle;
|
||||
@property (nonatomic, assign) BOOL parsingAuthor;
|
||||
@property (nonatomic) NSMutableString *xhtmlString;
|
||||
@property (nonatomic) NSString *feedLink;
|
||||
@property (nonatomic) NSString *feedTitle;
|
||||
@property (nonatomic) NSString *feedSubtitle;
|
||||
@property (nonatomic) NSMutableArray *articles;
|
||||
@property (nonatomic) NSDate *dateParsed;
|
||||
@property (nonatomic) RSSAXParser *parser;
|
||||
@property (nonatomic, readonly) RSParsedArticle *currentArticle;
|
||||
@property (nonatomic, readonly) NSDate *currentDate;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSAtomParser
|
||||
|
||||
#pragma mark - Class Methods
|
||||
#pragma mark - RSXMLParserDelegate
|
||||
|
||||
+ (BOOL)canParseFeed:(RSXMLData *)xmlData {
|
||||
|
||||
// Checking for '<feed' and '<entry' within first n characters should do it.
|
||||
|
||||
@autoreleasepool {
|
||||
|
||||
NSData *feedData = xmlData.data;
|
||||
|
||||
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)feedData.bytes length:feedData.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
|
||||
if (!s) {
|
||||
s = [[NSString alloc] initWithData:feedData encoding:NSUTF8StringEncoding];
|
||||
}
|
||||
if (!s) {
|
||||
s = [[NSString alloc] initWithData:feedData encoding:NSUnicodeStringEncoding];
|
||||
}
|
||||
if (!s) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
static const NSInteger numberOfCharactersToSearch = 4096;
|
||||
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
|
||||
if (s.length < numberOfCharactersToSearch) {
|
||||
rangeToSearch.length = s.length;
|
||||
}
|
||||
|
||||
NSRange feedRange = [s rangeOfString:@"<feed" options:NSLiteralSearch range:rangeToSearch];
|
||||
NSRange entryRange = [s rangeOfString:@"<entry" options:NSLiteralSearch range:rangeToSearch];
|
||||
if (feedRange.length < 1 || entryRange.length < 1) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
if (feedRange.location > entryRange.location) {
|
||||
return NO; // Wrong order.
|
||||
}
|
||||
}
|
||||
|
||||
return YES;
|
||||
+ (NSArray<const NSString *> *)parserRequireOrderedTags {
|
||||
return @[@"<feed", @"<entry"];
|
||||
}
|
||||
|
||||
#pragma mark - Helper
|
||||
|
||||
#pragma mark - Init
|
||||
- (void)setFeedOrArticleLink:(NSDictionary*)attribs {
|
||||
|
||||
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
_feedData = xmlData.data;
|
||||
_urlString = xmlData.urlString;
|
||||
_parser = [[RSSAXParser alloc] initWithDelegate:self];
|
||||
_attributesStack = [NSMutableArray new];
|
||||
_articles = [NSMutableArray new];
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - API
|
||||
|
||||
- (RSParsedFeed *)parseFeed {
|
||||
|
||||
[self parse];
|
||||
|
||||
RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.feedTitle link:self.feedLink articles:self.articles];
|
||||
parsedFeed.subtitle = self.feedSubtitle;
|
||||
|
||||
return parsedFeed;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Constants
|
||||
|
||||
static NSString *kTypeKey = @"type";
|
||||
static NSString *kXHTMLType = @"xhtml";
|
||||
static NSString *kRelKey = @"rel";
|
||||
static NSString *kAlternateValue = @"alternate";
|
||||
static NSString *kHrefKey = @"href";
|
||||
static NSString *kXMLKey = @"xml";
|
||||
static NSString *kBaseKey = @"base";
|
||||
static NSString *kLangKey = @"lang";
|
||||
static NSString *kXMLBaseKey = @"xml:base";
|
||||
static NSString *kXMLLangKey = @"xml:lang";
|
||||
static NSString *kTextHTMLValue = @"text/html";
|
||||
static NSString *kRelatedValue = @"related";
|
||||
static NSString *kShortURLValue = @"shorturl";
|
||||
static NSString *kHTMLValue = @"html";
|
||||
static NSString *kEnValue = @"en";
|
||||
static NSString *kTextValue = @"text";
|
||||
static NSString *kSelfValue = @"self";
|
||||
|
||||
static const char *kID = "id";
|
||||
static const NSInteger kIDLength = 3;
|
||||
|
||||
static const char *kTitle = "title";
|
||||
static const NSInteger kTitleLength = 6;
|
||||
|
||||
static const char *kSubtitle = "subtitle";
|
||||
static const NSInteger kSubtitleLength = 9;
|
||||
|
||||
static const char *kContent = "content";
|
||||
static const NSInteger kContentLength = 8;
|
||||
|
||||
static const char *kSummary = "summary";
|
||||
static const NSInteger kSummaryLength = 8;
|
||||
|
||||
static const char *kLink = "link";
|
||||
static const NSInteger kLinkLength = 5;
|
||||
|
||||
static const char *kPublished = "published";
|
||||
static const NSInteger kPublishedLength = 10;
|
||||
|
||||
static const char *kUpdated = "updated";
|
||||
static const NSInteger kUpdatedLength = 8;
|
||||
|
||||
static const char *kAuthor = "author";
|
||||
static const NSInteger kAuthorLength = 7;
|
||||
|
||||
static const char *kEntry = "entry";
|
||||
static const NSInteger kEntryLength = 6;
|
||||
|
||||
static const char *kSource = "source";
|
||||
static const NSInteger kSourceLength = 7;
|
||||
|
||||
static const char *kFeed = "feed";
|
||||
static const NSInteger kFeedLength = 5;
|
||||
|
||||
static const char *kType = "type";
|
||||
static const NSInteger kTypeLength = 5;
|
||||
|
||||
static const char *kRel = "rel";
|
||||
static const NSInteger kRelLength = 4;
|
||||
|
||||
static const char *kAlternate = "alternate";
|
||||
static const NSInteger kAlternateLength = 10;
|
||||
|
||||
static const char *kHref = "href";
|
||||
static const NSInteger kHrefLength = 5;
|
||||
|
||||
static const char *kXML = "xml";
|
||||
static const NSInteger kXMLLength = 4;
|
||||
|
||||
static const char *kBase = "base";
|
||||
static const NSInteger kBaseLength = 5;
|
||||
|
||||
static const char *kLang = "lang";
|
||||
static const NSInteger kLangLength = 5;
|
||||
|
||||
static const char *kTextHTML = "text/html";
|
||||
static const NSInteger kTextHTMLLength = 10;
|
||||
|
||||
static const char *kRelated = "related";
|
||||
static const NSInteger kRelatedLength = 8;
|
||||
|
||||
static const char *kShortURL = "shorturl";
|
||||
static const NSInteger kShortURLLength = 9;
|
||||
|
||||
static const char *kHTML = "html";
|
||||
static const NSInteger kHTMLLength = 5;
|
||||
|
||||
static const char *kEn = "en";
|
||||
static const NSInteger kEnLength = 3;
|
||||
|
||||
static const char *kText = "text";
|
||||
static const NSInteger kTextLength = 5;
|
||||
|
||||
static const char *kSelf = "self";
|
||||
static const NSInteger kSelfLength = 5;
|
||||
|
||||
|
||||
#pragma mark - Parsing
|
||||
|
||||
- (void)parse {
|
||||
|
||||
self.dateParsed = [NSDate date];
|
||||
|
||||
@autoreleasepool {
|
||||
[self.parser parseData:self.feedData];
|
||||
[self.parser finishParsing];
|
||||
}
|
||||
|
||||
// Optimization: make articles do calculations on this background thread.
|
||||
[self.articles makeObjectsPerformSelector:@selector(calculateArticleID)];
|
||||
}
|
||||
|
||||
|
||||
- (void)addArticle {
|
||||
|
||||
RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString];
|
||||
article.dateParsed = self.dateParsed;
|
||||
|
||||
[self.articles addObject:article];
|
||||
}
|
||||
|
||||
|
||||
- (RSParsedArticle *)currentArticle {
|
||||
|
||||
return self.articles.lastObject;
|
||||
}
|
||||
|
||||
|
||||
- (NSDictionary *)currentAttributes {
|
||||
|
||||
return self.attributesStack.lastObject;
|
||||
}
|
||||
|
||||
|
||||
- (NSDate *)currentDate {
|
||||
|
||||
return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length);
|
||||
}
|
||||
|
||||
|
||||
- (void)addFeedLink {
|
||||
|
||||
if (self.feedLink && self.feedLink.length > 0) {
|
||||
NSString *urlString = attribs[@"href"];
|
||||
if (urlString.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *related = self.currentAttributes[kRelKey];
|
||||
if (related == kAlternateValue) {
|
||||
self.feedLink = self.currentAttributes[kHrefKey];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)addFeedTitle {
|
||||
|
||||
if (self.feedTitle.length < 1) {
|
||||
self.feedTitle = self.parser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
}
|
||||
|
||||
- (void)addFeedSubtitle {
|
||||
|
||||
if (self.feedSubtitle.length < 1) {
|
||||
self.feedSubtitle = self.parser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
}
|
||||
|
||||
- (void)addLink {
|
||||
|
||||
NSString *urlString = self.currentAttributes[kHrefKey];
|
||||
if (urlString.length < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *rel = self.currentAttributes[kRelKey];
|
||||
if (rel.length < 1) {
|
||||
NSString *rel = attribs[@"rel"];
|
||||
if (rel.length == 0) {
|
||||
rel = kAlternateValue;
|
||||
}
|
||||
|
||||
if (rel == kAlternateValue) {
|
||||
if (!self.currentArticle.link) {
|
||||
self.currentArticle.link = urlString;
|
||||
if (!self.parsingArticle) { // Feed
|
||||
if (!self.parsedFeed.link && rel == kAlternateValue) {
|
||||
self.parsedFeed.link = urlString;
|
||||
}
|
||||
}
|
||||
else if (rel == kRelatedValue) {
|
||||
if (!self.currentArticle.permalink) {
|
||||
else if (!self.parsingSource) { // Article
|
||||
if (!self.currentArticle.link && rel == kAlternateValue) {
|
||||
self.currentArticle.link = urlString;
|
||||
}
|
||||
else if (!self.currentArticle.permalink && rel == kRelatedValue) {
|
||||
self.currentArticle.permalink = urlString;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)addContent {
|
||||
|
||||
self.currentArticle.body = [self currentStringWithHTMLEntitiesDecoded];
|
||||
}
|
||||
#pragma mark - Parse XHTML
|
||||
|
||||
|
||||
- (void)addSummary {
|
||||
|
||||
self.currentArticle.abstract = [self currentStringWithHTMLEntitiesDecoded];
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)currentStringWithHTMLEntitiesDecoded {
|
||||
|
||||
return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities];
|
||||
}
|
||||
|
||||
|
||||
- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {
|
||||
|
||||
if (prefix) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(localName, kID, kIDLength)) {
|
||||
self.currentArticle.guid = self.parser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
|
||||
self.currentArticle.title = [self currentStringWithHTMLEntitiesDecoded];
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kContent, kContentLength)) {
|
||||
[self addContent];
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kSummary, kSummaryLength)) {
|
||||
[self addSummary];
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
|
||||
[self addLink];
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kPublished, kPublishedLength)) {
|
||||
self.currentArticle.datePublished = self.currentDate;
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kUpdated, kUpdatedLength)) {
|
||||
self.currentArticle.dateModified = self.currentDate;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)addXHTMLTag:(const xmlChar *)localName {
|
||||
- (void)addXHTMLTag:(const xmlChar *)localName attributes:(NSDictionary*)attribs {
|
||||
|
||||
if (!localName) {
|
||||
return;
|
||||
}
|
||||
|
||||
[self.xhtmlString appendString:@"<"];
|
||||
[self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]];
|
||||
[self.xhtmlString appendFormat:@"<%s", localName];
|
||||
|
||||
if (self.currentAttributes.count < 1) {
|
||||
[self.xhtmlString appendString:@">"];
|
||||
return;
|
||||
}
|
||||
|
||||
for (NSString *oneKey in self.currentAttributes) {
|
||||
|
||||
[self.xhtmlString appendString:@" "];
|
||||
|
||||
NSString *oneValue = self.currentAttributes[oneKey];
|
||||
[self.xhtmlString appendString:oneKey];
|
||||
|
||||
[self.xhtmlString appendString:@"=\""];
|
||||
|
||||
oneValue = [oneValue stringByReplacingOccurrencesOfString:@"\"" withString:@"""];
|
||||
[self.xhtmlString appendString:oneValue];
|
||||
|
||||
[self.xhtmlString appendString:@"\""];
|
||||
for (NSString *key in attribs) {
|
||||
NSString *val = [attribs[key] stringByReplacingOccurrencesOfString:@"\"" withString:@"""];
|
||||
[self.xhtmlString appendFormat:@" %@=\"%@\"", key, val];
|
||||
}
|
||||
|
||||
[self.xhtmlString appendString:@">"];
|
||||
}
|
||||
|
||||
- (void)parseXHTMLEndElement:(const xmlChar *)localName length:(int)len {
|
||||
if (len == 7) {
|
||||
if (EqualBytes(localName, "content", 7)) {
|
||||
if (self.parsingArticle) {
|
||||
self.currentArticle.body = [self.xhtmlString copy];
|
||||
}
|
||||
self.parsingXHTML = NO;
|
||||
}
|
||||
else if (EqualBytes(localName, "summary", 7)) {
|
||||
if (self.parsingArticle) {
|
||||
self.currentArticle.abstract = [self.xhtmlString copy];
|
||||
}
|
||||
self.parsingXHTML = NO;
|
||||
}
|
||||
}
|
||||
[self.xhtmlString appendFormat:@"</%s>", localName];
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - RSSAXParserDelegate
|
||||
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
|
||||
|
||||
if (self.endFeedFound) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSDictionary *xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
|
||||
if (!xmlAttributes) {
|
||||
xmlAttributes = [NSDictionary dictionary];
|
||||
}
|
||||
[self.attributesStack addObject:xmlAttributes];
|
||||
|
||||
if (self.parsingXHTML) {
|
||||
[self addXHTMLTag:localName];
|
||||
NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
|
||||
[self addXHTMLTag:localName attributes:attribs];
|
||||
return;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(localName, kEntry, kEntryLength)) {
|
||||
self.parsingArticle = YES;
|
||||
[self addArticle];
|
||||
return;
|
||||
|
||||
int len = xmlStrlen(localName);
|
||||
switch (len) {
|
||||
case 4:
|
||||
if (EqualBytes(localName, "link", 4)) {
|
||||
NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
|
||||
[self setFeedOrArticleLink:attribs];
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
if (EqualBytes(localName, "entry", 5)) {
|
||||
self.parsingArticle = YES;
|
||||
self.currentArticle = [self.parsedFeed appendNewArticle];
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
if (EqualBytes(localName, "author", 6)) {
|
||||
self.parsingAuthor = YES;
|
||||
return;
|
||||
} else if (EqualBytes(localName, "source", 6)) {
|
||||
self.parsingSource = YES;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 7: // uses attrib
|
||||
if (self.parsingArticle) {
|
||||
break;
|
||||
}
|
||||
if (!EqualBytes(localName, "content", 7) && !EqualBytes(localName, "summary", 7)) {
|
||||
break;
|
||||
}
|
||||
NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
|
||||
if ([attribs[@"type"] isEqualToString:@"xhtml"]) {
|
||||
self.parsingXHTML = YES;
|
||||
self.xhtmlString = [NSMutableString stringWithString:@""];
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
|
||||
self.parsingAuthor = YES;
|
||||
return;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(localName, kSource, kSourceLength)) {
|
||||
self.parsingSource = YES;
|
||||
return;
|
||||
}
|
||||
|
||||
BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength);
|
||||
BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength);
|
||||
if (self.parsingArticle && (isContentTag || isSummaryTag)) {
|
||||
|
||||
NSString *contentType = xmlAttributes[kTypeKey];
|
||||
if ([contentType isEqualToString:kXHTMLType]) {
|
||||
self.parsingXHTML = YES;
|
||||
self.xhtmlString = [NSMutableString stringWithString:@""];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!self.parsingArticle && RSSAXEqualTags(localName, kLink, kLinkLength)) {
|
||||
[self addFeedLink];
|
||||
return;
|
||||
}
|
||||
|
||||
[self.parser beginStoringCharacters];
|
||||
[SAXParser beginStoringCharacters];
|
||||
}
|
||||
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
|
||||
|
||||
if (RSSAXEqualTags(localName, kFeed, kFeedLength)) {
|
||||
self.endFeedFound = YES;
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.endFeedFound) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.parsingXHTML) {
|
||||
|
||||
BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength);
|
||||
BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength);
|
||||
|
||||
if (self.parsingArticle) {
|
||||
if (isContentTag) {
|
||||
self.currentArticle.body = [self.xhtmlString copy];
|
||||
}
|
||||
else if (isSummaryTag) {
|
||||
self.currentArticle.abstract = [self.xhtmlString copy];
|
||||
}
|
||||
}
|
||||
|
||||
if (isContentTag || isSummaryTag) {
|
||||
self.parsingXHTML = NO;
|
||||
}
|
||||
|
||||
[self.xhtmlString appendString:@"</"];
|
||||
[self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]];
|
||||
[self.xhtmlString appendString:@">"];
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
|
||||
self.parsingAuthor = NO;
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kEntry, kEntryLength)) {
|
||||
self.parsingArticle = NO;
|
||||
}
|
||||
|
||||
else if (self.parsingArticle && !self.parsingSource) {
|
||||
[self addArticleElement:localName prefix:prefix];
|
||||
}
|
||||
int len = xmlStrlen(localName);
|
||||
|
||||
else if (RSSAXEqualTags(localName, kSource, kSourceLength)) {
|
||||
self.parsingSource = NO;
|
||||
if (len == 4 && EqualBytes(localName, "feed", 4)) {
|
||||
self.endFeedFound = YES;
|
||||
return;
|
||||
}
|
||||
|
||||
else if (!self.parsingArticle && !self.parsingSource) {
|
||||
if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
|
||||
[self addFeedTitle];
|
||||
}
|
||||
else if (RSSAXEqualTags(localName, kSubtitle, kSubtitleLength)) {
|
||||
[self addFeedSubtitle];
|
||||
}
|
||||
}
|
||||
[self.attributesStack removeLastObject];
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
|
||||
|
||||
if (prefix && RSSAXEqualTags(prefix, kXML, kXMLLength)) {
|
||||
|
||||
if (RSSAXEqualTags(name, kBase, kBaseLength)) {
|
||||
return kXMLBaseKey;
|
||||
}
|
||||
if (RSSAXEqualTags(name, kLang, kLangLength)) {
|
||||
return kXMLLangKey;
|
||||
}
|
||||
if (self.parsingXHTML) {
|
||||
[self parseXHTMLEndElement:localName length:len];
|
||||
return;
|
||||
}
|
||||
|
||||
if (prefix) {
|
||||
return nil;
|
||||
BOOL isArticle = (self.parsingArticle && !self.parsingSource && !prefix);
|
||||
|
||||
switch (len) {
|
||||
case 2:
|
||||
if (isArticle && EqualBytes(localName, "id", 2)) {
|
||||
self.currentArticle.guid = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
return;
|
||||
case 5:
|
||||
if (EqualBytes(localName, "entry", 5)) {
|
||||
self.parsingArticle = NO;
|
||||
}
|
||||
else if (isArticle && EqualBytes(localName, "title", 5)) {
|
||||
self.currentArticle.title = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace];
|
||||
}
|
||||
else if (!self.parsingArticle && !self.parsingSource && self.parsedFeed.title.length == 0) {
|
||||
if (EqualBytes(localName, "title", 5)) {
|
||||
self.parsedFeed.title = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
}
|
||||
return;
|
||||
case 6:
|
||||
if (EqualBytes(localName, "author", 6)) {
|
||||
self.parsingAuthor = NO;
|
||||
}
|
||||
else if (EqualBytes(localName, "source", 6)) {
|
||||
self.parsingSource = NO;
|
||||
}
|
||||
return;
|
||||
case 8:
|
||||
if (!self.parsingArticle && !self.parsingSource && self.parsedFeed.subtitle.length == 0) {
|
||||
if (EqualBytes(localName, "subtitle", 8)) {
|
||||
self.parsedFeed.subtitle = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
}
|
||||
return;
|
||||
case 7:
|
||||
if (isArticle) {
|
||||
if (EqualBytes(localName, "content", 7)) {
|
||||
self.currentArticle.body = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace];
|
||||
}
|
||||
else if (EqualBytes(localName, "summary", 7)) {
|
||||
self.currentArticle.abstract = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace];
|
||||
}
|
||||
else if (EqualBytes(localName, "updated", 7)) {
|
||||
self.currentArticle.dateModified = [self dateFromCharacters:SAXParser.currentCharacters];
|
||||
}
|
||||
}
|
||||
return;
|
||||
case 9:
|
||||
if (isArticle && EqualBytes(localName, "published", 9)) {
|
||||
self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(name, kRel, kRelLength)) {
|
||||
return kRelKey;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(name, kType, kTypeLength)) {
|
||||
return kTypeKey;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(name, kHref, kHrefLength)) {
|
||||
return kHrefKey;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(name, kAlternate, kAlternateLength)) {
|
||||
return kAlternateValue;
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
|
||||
|
||||
static const NSUInteger alternateLength = kAlternateLength - 1;
|
||||
static const NSUInteger textHTMLLength = kTextHTMLLength - 1;
|
||||
static const NSUInteger relatedLength = kRelatedLength - 1;
|
||||
static const NSUInteger shortURLLength = kShortURLLength - 1;
|
||||
static const NSUInteger htmlLength = kHTMLLength - 1;
|
||||
static const NSUInteger enLength = kEnLength - 1;
|
||||
static const NSUInteger textLength = kTextLength - 1;
|
||||
static const NSUInteger selfLength = kSelfLength - 1;
|
||||
|
||||
if (length == alternateLength && RSSAXEqualBytes(bytes, kAlternate, alternateLength)) {
|
||||
return kAlternateValue;
|
||||
}
|
||||
|
||||
if (length == textHTMLLength && RSSAXEqualBytes(bytes, kTextHTML, textHTMLLength)) {
|
||||
return kTextHTMLValue;
|
||||
}
|
||||
|
||||
if (length == relatedLength && RSSAXEqualBytes(bytes, kRelated, relatedLength)) {
|
||||
return kRelatedValue;
|
||||
}
|
||||
|
||||
if (length == shortURLLength && RSSAXEqualBytes(bytes, kShortURL, shortURLLength)) {
|
||||
return kShortURLValue;
|
||||
}
|
||||
|
||||
if (length == htmlLength && RSSAXEqualBytes(bytes, kHTML, htmlLength)) {
|
||||
return kHTMLValue;
|
||||
}
|
||||
|
||||
if (length == enLength && RSSAXEqualBytes(bytes, kEn, enLength)) {
|
||||
return kEnValue;
|
||||
}
|
||||
|
||||
if (length == textLength && RSSAXEqualBytes(bytes, kText, textLength)) {
|
||||
return kTextValue;
|
||||
}
|
||||
|
||||
if (length == selfLength && RSSAXEqualBytes(bytes, kSelf, selfLength)) {
|
||||
return kSelfValue;
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
@@ -606,4 +258,60 @@ static const NSInteger kSelfLength = 5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
|
||||
|
||||
int len = xmlStrlen(name);
|
||||
|
||||
if (prefix) {
|
||||
if (len == 4 && EqualBytes(prefix, "xml", 3)) { // len == 4 is for the next two lines already
|
||||
if (EqualBytes(name, "base", 4)) { return @"xml:base"; }
|
||||
if (EqualBytes(name, "lang", 4)) { return @"xml:lang"; }
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
switch (len) {
|
||||
case 3:
|
||||
if (EqualBytes(name, "rel", 3)) { return @"rel"; }
|
||||
break;
|
||||
case 4:
|
||||
if (EqualBytes(name, "type", 4)) { return @"type"; }
|
||||
if (EqualBytes(name, "href", 4)) { return @"href"; }
|
||||
break;
|
||||
case 9:
|
||||
if (EqualBytes(name, "alternate", 9)) { return kAlternateValue; }
|
||||
break;
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
|
||||
|
||||
switch (length) {
|
||||
case 2:
|
||||
if (EqualBytes(bytes, "en", 2)) { return @"en"; }
|
||||
break;
|
||||
case 4:
|
||||
if (EqualBytes(bytes, "html", 4)) { return @"html"; }
|
||||
if (EqualBytes(bytes, "text", 4)) { return @"text"; }
|
||||
if (EqualBytes(bytes, "self", 4)) { return @"self"; }
|
||||
break;
|
||||
case 7:
|
||||
if (EqualBytes(bytes, "related", 7)) { return kRelatedValue; }
|
||||
break;
|
||||
case 8:
|
||||
if (EqualBytes(bytes, "shorturl", 8)) { return @"shorturl"; }
|
||||
break;
|
||||
case 9:
|
||||
if (EqualBytes(bytes, "alternate", 9)) { return kAlternateValue; }
|
||||
if (EqualBytes(bytes, "text/html", 9)) { return @"text/html"; }
|
||||
break;
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,10 +1,25 @@
|
||||
//
|
||||
// RSDateParser.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 3/25/15.
|
||||
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
|
||||
|
||||
@@ -1,10 +1,25 @@
|
||||
//
|
||||
// RSDateParser.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 3/25/15.
|
||||
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import <time.h>
|
||||
#import "RSDateParser.h"
|
||||
|
||||
@@ -1,28 +1,35 @@
|
||||
//
|
||||
// RSFeedParser.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 1/4/15.
|
||||
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "FeedParser.h"
|
||||
#import "RSXMLParser.h"
|
||||
|
||||
// If you have a feed and don’t know or care what it is (RSS or Atom),
|
||||
// then call RSParseFeed or RSParseFeedSync.
|
||||
@class RSParsedFeed, RSParsedArticle;
|
||||
|
||||
@class RSXMLData;
|
||||
@class RSParsedFeed;
|
||||
@interface RSFeedParser : RSXMLParser<RSParsedFeed*>
|
||||
@property (nonatomic, readonly) RSParsedFeed *parsedFeed;
|
||||
@property (nonatomic, weak) RSParsedArticle *currentArticle;
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
BOOL RSCanParseFeed(RSXMLData *xmlData);
|
||||
|
||||
|
||||
typedef void (^RSParsedFeedBlock)(RSParsedFeed * _Nullable parsedFeed, NSError * _Nullable error);
|
||||
|
||||
// callback is called on main queue.
|
||||
void RSParseFeed(RSXMLData *xmlData, RSParsedFeedBlock callback);
|
||||
RSParsedFeed * _Nullable RSParseFeedSync(RSXMLData *xmlData, NSError * _Nullable * _Nullable error);
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
- (NSDate *)dateFromCharacters:(NSData *)data;
|
||||
- (NSString *)decodeHTMLEntities:(NSString *)str;
|
||||
@end
|
||||
|
||||
@@ -1,229 +1,58 @@
|
||||
//
|
||||
// FeedParser.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 1/4/15.
|
||||
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "RSXMLError.h"
|
||||
#import "RSFeedParser.h"
|
||||
#import "FeedParser.h"
|
||||
#import "RSXMLData.h"
|
||||
#import "RSRSSParser.h"
|
||||
#import "RSAtomParser.h"
|
||||
#import "RSParsedFeed.h"
|
||||
#import "RSParsedArticle.h"
|
||||
#import "RSDateParser.h"
|
||||
#import "NSString+RSXML.h"
|
||||
|
||||
static NSArray *parserClasses(void) {
|
||||
|
||||
static NSArray *gParserClasses = nil;
|
||||
|
||||
static dispatch_once_t onceToken;
|
||||
dispatch_once(&onceToken, ^{
|
||||
|
||||
gParserClasses = @[[RSRSSParser class], [RSAtomParser class]];
|
||||
});
|
||||
|
||||
return gParserClasses;
|
||||
}
|
||||
@implementation RSFeedParser
|
||||
|
||||
static BOOL feedMayBeParseable(RSXMLData *xmlData) {
|
||||
|
||||
/*Sanity checks.*/
|
||||
|
||||
if (!xmlData.data) {
|
||||
return NO;
|
||||
}
|
||||
#pragma mark - RSXMLParserDelegate
|
||||
|
||||
/*TODO: check size, type, etc.*/
|
||||
|
||||
+ (BOOL)isFeedParser { return YES; }
|
||||
|
||||
- (BOOL)xmlParserWillStartParsing {
|
||||
_parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.documentURI];
|
||||
return YES;
|
||||
}
|
||||
|
||||
static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes);
|
||||
|
||||
static const NSUInteger maxNumberOfBytesToSearch = 4096;
|
||||
static const NSUInteger minNumberOfBytesToSearch = 20;
|
||||
|
||||
static Class parserClassForXMLData(RSXMLData *xmlData, NSError **error) {
|
||||
|
||||
if (!feedMayBeParseable(xmlData)) {
|
||||
RSXMLSetError(error, RSXMLErrorNoData, nil);
|
||||
return nil;
|
||||
}
|
||||
|
||||
// TODO: check for things like images and movies and return nil.
|
||||
|
||||
const char *bytes = xmlData.data.bytes;
|
||||
NSUInteger numberOfBytes = xmlData.data.length;
|
||||
|
||||
if (numberOfBytes > minNumberOfBytesToSearch) {
|
||||
|
||||
if (numberOfBytes > maxNumberOfBytesToSearch) {
|
||||
numberOfBytes = maxNumberOfBytesToSearch;
|
||||
}
|
||||
|
||||
if (!dataHasLeftCaret(bytes, numberOfBytes)) {
|
||||
RSXMLSetError(error, RSXMLErrorMissingLeftCaret, nil);
|
||||
return nil;
|
||||
}
|
||||
if (optimisticCanParseRSSData(bytes, numberOfBytes)) {
|
||||
return [RSRSSParser class];
|
||||
}
|
||||
if (optimisticCanParseAtomData(bytes, numberOfBytes)) {
|
||||
return [RSAtomParser class];
|
||||
}
|
||||
if (optimisticCanParseRDF(bytes, numberOfBytes)) {
|
||||
return [RSRSSParser class]; //TODO: parse RDF feeds, using RSS parser so far ...
|
||||
}
|
||||
if (dataIsProbablyHTML(bytes, numberOfBytes)) {
|
||||
RSXMLSetError(error, RSXMLErrorProbablyHTML, nil);
|
||||
return nil;
|
||||
}
|
||||
if (dataIsSomeWeirdException(bytes, numberOfBytes)) {
|
||||
RSXMLSetError(error, RSXMLErrorContainsXMLErrorsTag, nil);
|
||||
return nil;
|
||||
}
|
||||
}
|
||||
|
||||
for (Class parserClass in parserClasses()) {
|
||||
if ([parserClass canParseFeed:xmlData]) {
|
||||
return parserClass;
|
||||
//return [[parserClass alloc] initWithXMLData:xmlData]; // does not make sense to return instance
|
||||
}
|
||||
}
|
||||
// Try RSS anyway? libxml would return a parsing error
|
||||
RSXMLSetError(error, RSXMLErrorNoSuitableParser, nil);
|
||||
return nil;
|
||||
- (id)xmlParserWillReturnDocument {
|
||||
// Optimization: make articles do calculations on this background thread.
|
||||
[_parsedFeed.articles makeObjectsPerformSelector:@selector(calculateArticleID)];
|
||||
return _parsedFeed;
|
||||
}
|
||||
|
||||
static id<FeedParser> parserForXMLData(RSXMLData *xmlData, NSError **error) {
|
||||
|
||||
Class parserClass = parserClassForXMLData(xmlData, error);
|
||||
if (!parserClass) {
|
||||
return nil;
|
||||
}
|
||||
return [[parserClass alloc] initWithXMLData:xmlData];
|
||||
/// @return @c NSDate by parsing RFC 822 and 8601 date strings.
|
||||
- (NSDate *)dateFromCharacters:(NSData *)data {
|
||||
return RSDateWithBytes(data.bytes, data.length);
|
||||
}
|
||||
|
||||
static BOOL canParseXMLData(RSXMLData *xmlData) {
|
||||
|
||||
return parserClassForXMLData(xmlData, nil) != nil;
|
||||
}
|
||||
|
||||
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
char *foundString = strnstr(bytes, string, numberOfBytes);
|
||||
return foundString != NULL;
|
||||
}
|
||||
|
||||
static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
return didFindString("<", bytes, numberOfBytes);
|
||||
}
|
||||
|
||||
static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
// Won’t catch every single case, which is fine.
|
||||
|
||||
if (didFindString("<html", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("<body", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("doctype html", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("DOCTYPE html", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("DOCTYPE HTML", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("<meta", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("<HTML", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
return NO;
|
||||
}
|
||||
|
||||
static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
if (didFindString("<errors xmlns='http://schemas.google", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
return NO;
|
||||
}
|
||||
|
||||
static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
return didFindString("<rdf:RDF", bytes, numberOfBytes);
|
||||
}
|
||||
|
||||
static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
if (!didFindString("<rss", bytes, numberOfBytes)) {
|
||||
return NO;
|
||||
}
|
||||
return didFindString("<channel", bytes, numberOfBytes);
|
||||
}
|
||||
|
||||
static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
return didFindString("<feed", bytes, numberOfBytes);
|
||||
}
|
||||
|
||||
static void callCallback(RSParsedFeedBlock callback, RSParsedFeed *parsedFeed, NSError *error) {
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
|
||||
@autoreleasepool {
|
||||
if (callback) {
|
||||
callback(parsedFeed, error);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - API
|
||||
|
||||
BOOL RSCanParseFeed(RSXMLData *xmlData) {
|
||||
|
||||
return canParseXMLData(xmlData);
|
||||
}
|
||||
|
||||
void RSParseFeed(RSXMLData *xmlData, RSParsedFeedBlock callback) {
|
||||
|
||||
dispatch_async(dispatch_get_global_queue(QOS_CLASS_UTILITY, 0), ^{
|
||||
|
||||
NSError *error = nil;
|
||||
RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error);
|
||||
callCallback(callback, parsedFeed, error);
|
||||
});
|
||||
}
|
||||
|
||||
RSParsedFeed *RSParseFeedSync(RSXMLData *xmlData, NSError **error) {
|
||||
|
||||
xmlResetLastError();
|
||||
id<FeedParser> parser = parserForXMLData(xmlData, error);
|
||||
if (error && *error) {
|
||||
return nil;
|
||||
}
|
||||
RSParsedFeed *parsedResult = [parser parseFeed];
|
||||
if (error) {
|
||||
*error = RSXMLMakeErrorFromLIBXMLError(xmlGetLastError());
|
||||
xmlResetLastError();
|
||||
}
|
||||
return parsedResult;
|
||||
/// @return currentString by removing HTML encoded entities.
|
||||
- (NSString *)decodeHTMLEntities:(NSString *)str {
|
||||
return [str rs_stringByDecodingHTMLEntities];
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,31 +1,31 @@
|
||||
//
|
||||
// RSHTMLLinkParser.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 8/7/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
#import "RSXMLParser.h"
|
||||
|
||||
/*Returns all <a href="some_url">some_text</a> as RSHTMLLink object array.*/
|
||||
|
||||
@class RSXMLData;
|
||||
@class RSHTMLLink;
|
||||
|
||||
@interface RSHTMLLinkParser : NSObject
|
||||
|
||||
+ (NSArray <RSHTMLLink *> *)htmlLinksWithData:(RSXMLData *)xmlData;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@interface RSHTMLLink : NSObject
|
||||
|
||||
// Any of these, even urlString, may be nil, because HTML can be bad.
|
||||
|
||||
@property (nonatomic, readonly) NSString *urlString; //absolute
|
||||
@property (nonatomic, readonly) NSString *text;
|
||||
@property (nonatomic, readonly) NSString *title; //title attribute inside anchor tag
|
||||
@class RSHTMLMetadataAnchor;
|
||||
|
||||
@interface RSHTMLLinkParser : RSXMLParser<NSArray<RSHTMLMetadataAnchor*>*>
|
||||
@end
|
||||
|
||||
@@ -1,151 +1,91 @@
|
||||
//
|
||||
// RSHTMLLinkParser.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 8/7/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import <libxml/xmlstring.h>
|
||||
#import "RSHTMLLinkParser.h"
|
||||
#import "RSSAXHTMLParser.h"
|
||||
#import "RSSAXParser.h"
|
||||
#import "RSXMLData.h"
|
||||
#import "RSXMLInternal.h"
|
||||
#import "RSHTMLMetadata.h"
|
||||
#import "NSDictionary+RSXML.h"
|
||||
|
||||
|
||||
@interface RSHTMLLinkParser() <RSSAXHTMLParserDelegate>
|
||||
|
||||
@property (nonatomic, readonly) NSMutableArray *links;
|
||||
@property (nonatomic, readonly) RSXMLData *xmlData;
|
||||
@property (nonatomic, readonly) NSMutableArray *dictionaries;
|
||||
@interface RSHTMLLinkParser()
|
||||
@property (nonatomic, readonly) NSURL *baseURL;
|
||||
|
||||
@property (nonatomic) NSMutableArray<RSHTMLMetadataAnchor*> *mutableLinksList;
|
||||
@property (nonatomic) NSMutableString *currentText;
|
||||
@end
|
||||
|
||||
|
||||
@interface RSHTMLLink()
|
||||
|
||||
@property (nonatomic, readwrite) NSString *urlString; //absolute
|
||||
@property (nonatomic, readwrite) NSString *text;
|
||||
@property (nonatomic, readwrite) NSString *title; //title attribute inside anchor tag
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSHTMLLinkParser
|
||||
|
||||
#pragma mark - RSXMLParserDelegate
|
||||
|
||||
#pragma mark - Class Methods
|
||||
+ (BOOL)isHTMLParser { return YES; }
|
||||
|
||||
+ (NSArray *)htmlLinksWithData:(RSXMLData *)xmlData {
|
||||
- (BOOL)xmlParserWillStartParsing {
|
||||
_baseURL = [NSURL URLWithString:self.documentURI];
|
||||
_mutableLinksList = [NSMutableArray new];
|
||||
return YES;
|
||||
}
|
||||
|
||||
RSHTMLLinkParser *parser = [[self alloc] initWithXMLData:xmlData];
|
||||
return parser.links;
|
||||
- (id)xmlParserWillReturnDocument {
|
||||
return [_mutableLinksList copy];
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Init
|
||||
#pragma mark - RSSAXParserDelegate
|
||||
|
||||
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
|
||||
|
||||
NSParameterAssert(xmlData.data);
|
||||
NSParameterAssert(xmlData.urlString);
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
if (EqualBytes(localName, "a", 2)) { // 2 because length is not checked
|
||||
NSDictionary *attribs = [SAXParser attributesDictionaryHTML:attributes];
|
||||
if (!attribs || attribs.count == 0) {
|
||||
return;
|
||||
}
|
||||
NSString *href = [attribs rsxml_objectForCaseInsensitiveKey:@"href"];
|
||||
if (!href) {
|
||||
return;
|
||||
}
|
||||
RSHTMLMetadataAnchor *obj = [RSHTMLMetadataAnchor new];
|
||||
[self.mutableLinksList addObject:obj];
|
||||
// set link properties
|
||||
obj.tooltip = [attribs rsxml_objectForCaseInsensitiveKey:@"title"];
|
||||
obj.link = [[NSURL URLWithString:href relativeToURL:self.baseURL] absoluteString];
|
||||
// begin storing data for link description
|
||||
[SAXParser beginStoringCharacters];
|
||||
self.currentText = [NSMutableString new];
|
||||
}
|
||||
|
||||
_links = [NSMutableArray new];
|
||||
_xmlData = xmlData;
|
||||
_dictionaries = [NSMutableArray new];
|
||||
_baseURL = [NSURL URLWithString:xmlData.urlString];
|
||||
|
||||
[self parse];
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Parse
|
||||
|
||||
- (void)parse {
|
||||
|
||||
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
|
||||
[parser parseData:self.xmlData.data];
|
||||
[parser finishParsing];
|
||||
}
|
||||
|
||||
|
||||
- (RSHTMLLink *)currentLink {
|
||||
|
||||
return self.links.lastObject;
|
||||
}
|
||||
|
||||
|
||||
static NSString *kHrefKey = @"href";
|
||||
|
||||
- (NSString *)urlStringFromDictionary:(NSDictionary *)d {
|
||||
|
||||
NSString *href = [d rsxml_objectForCaseInsensitiveKey:kHrefKey];
|
||||
if (!href) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSURL *absoluteURL = [NSURL URLWithString:href relativeToURL:self.baseURL];
|
||||
return absoluteURL.absoluteString;
|
||||
}
|
||||
|
||||
|
||||
static NSString *kTitleKey = @"title";
|
||||
|
||||
- (NSString *)titleFromDictionary:(NSDictionary *)d {
|
||||
|
||||
return [d rsxml_objectForCaseInsensitiveKey:kTitleKey];
|
||||
}
|
||||
|
||||
|
||||
- (void)handleLinkAttributes:(NSDictionary *)d {
|
||||
|
||||
RSHTMLLink *link = self.currentLink;
|
||||
link.urlString = [self urlStringFromDictionary:d];
|
||||
link.title = [self titleFromDictionary:d];
|
||||
}
|
||||
|
||||
|
||||
static const char *kAnchor = "a";
|
||||
static const NSInteger kAnchorLength = 2;
|
||||
|
||||
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
|
||||
|
||||
if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) {
|
||||
return;
|
||||
}
|
||||
|
||||
RSHTMLLink *link = [RSHTMLLink new];
|
||||
[self.links addObject:link];
|
||||
|
||||
NSDictionary *d = [SAXParser attributesDictionary:attributes];
|
||||
if (!RSXMLIsEmpty(d)) {
|
||||
[self handleLinkAttributes:d];
|
||||
}
|
||||
|
||||
[SAXParser beginStoringCharacters];
|
||||
}
|
||||
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName {
|
||||
|
||||
if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) {
|
||||
return;
|
||||
if (self.currentText != nil) {
|
||||
NSString *str = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
if (str) {
|
||||
[self.currentText appendString:str];
|
||||
}
|
||||
if (EqualBytes(localName, "a", 2)) { // 2 because length is not checked
|
||||
self.mutableLinksList.lastObject.title = self.currentText;
|
||||
self.currentText = nil;
|
||||
}
|
||||
}
|
||||
|
||||
self.currentLink.text = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@implementation RSHTMLLink
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,45 +1,64 @@
|
||||
//
|
||||
// RSHTMLMetadata.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 3/6/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
|
||||
@class RSHTMLMetadataFeedLink;
|
||||
@class RSHTMLMetadataAppleTouchIcon;
|
||||
typedef enum {
|
||||
RSFeedTypeNone,
|
||||
RSFeedTypeRSS,
|
||||
RSFeedTypeAtom
|
||||
} RSFeedType;
|
||||
|
||||
RSFeedType RSFeedTypeFromLinkTypeAttribute(NSString * typeStr);
|
||||
|
||||
|
||||
@class RSHTMLMetadataIconLink, RSHTMLMetadataFeedLink;
|
||||
|
||||
@interface RSHTMLMetadata : NSObject
|
||||
|
||||
- (instancetype)initWithURLString:(NSString *)urlString dictionaries:(NSArray <NSDictionary *> *)dictionaries;
|
||||
|
||||
@property (nonatomic, readonly) NSString *baseURLString;
|
||||
@property (nonatomic, readonly) NSArray <NSDictionary *> *dictionaries;
|
||||
|
||||
@property (nonatomic, readonly) NSString *faviconLink;
|
||||
@property (nonatomic, readonly) NSArray <RSHTMLMetadataAppleTouchIcon *> *appleTouchIcons;
|
||||
@property (nonatomic, readonly) NSArray <RSHTMLMetadataFeedLink *> *feedLinks;
|
||||
|
||||
@property (nonatomic, copy, nullable) NSString *faviconLink;
|
||||
@property (nonatomic, nonnull) NSArray <RSHTMLMetadataIconLink *> *iconLinks;
|
||||
@property (nonatomic, nonnull) NSArray <RSHTMLMetadataFeedLink *> *feedLinks;
|
||||
@end
|
||||
|
||||
|
||||
@interface RSHTMLMetadataAppleTouchIcon : NSObject
|
||||
|
||||
@property (nonatomic, readonly) NSString *rel;
|
||||
@property (nonatomic, readonly) NSString *sizes;
|
||||
@property (nonatomic, readonly) NSString *urlString; // Absolute.
|
||||
|
||||
@interface RSHTMLMetadataLink : NSObject
|
||||
@property (nonatomic, copy, nonnull) NSString *link; // absolute
|
||||
@property (nonatomic, copy, nullable) NSString *title;
|
||||
@end
|
||||
|
||||
|
||||
@interface RSHTMLMetadataFeedLink : NSObject
|
||||
|
||||
@property (nonatomic, readonly) NSString *title;
|
||||
@property (nonatomic, readonly) NSString *type;
|
||||
@property (nonatomic, readonly) NSString *urlString; // Absolute.
|
||||
|
||||
@interface RSHTMLMetadataIconLink : RSHTMLMetadataLink
|
||||
@property (nonatomic, copy, nullable) NSString *sizes;
|
||||
- (CGSize)getSize;
|
||||
@end
|
||||
|
||||
|
||||
@interface RSHTMLMetadataFeedLink : RSHTMLMetadataLink // title: 'icon' or 'apple-touch-icon*'
|
||||
@property (nonatomic, assign) RSFeedType type;
|
||||
@end
|
||||
|
||||
|
||||
@interface RSHTMLMetadataAnchor : RSHTMLMetadataLink // title: anchor text-value
|
||||
@property (nonatomic, copy, nullable) NSString *tooltip;
|
||||
@end
|
||||
|
||||
@@ -1,245 +1,98 @@
|
||||
//
|
||||
// RSHTMLMetadata.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 3/6/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "RSHTMLMetadata.h"
|
||||
#import "RSXMLInternal.h"
|
||||
|
||||
static NSString *urlStringFromDictionary(NSDictionary *d);
|
||||
static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString);
|
||||
static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString);
|
||||
static NSArray *objectsOfClassWithDictionaries(Class class, NSArray *dictionaries, NSString *baseURLString);
|
||||
static NSString *relValue(NSDictionary *d);
|
||||
static BOOL typeIsFeedType(NSString *type);
|
||||
RSFeedType RSFeedTypeFromLinkTypeAttribute(NSString * typeStr) {
|
||||
if (typeStr || typeStr.length > 0) {
|
||||
typeStr = [typeStr lowercaseString];
|
||||
if ([typeStr hasSuffix:@"/rss+xml"]) {
|
||||
return RSFeedTypeRSS;
|
||||
} else if ([typeStr hasSuffix:@"/atom+xml"]) {
|
||||
return RSFeedTypeAtom;
|
||||
}
|
||||
}
|
||||
return RSFeedTypeNone;
|
||||
}
|
||||
|
||||
static NSString *kShortcutIconRelValue = @"shortcut icon";
|
||||
static NSString *kHrefKey = @"href";
|
||||
static NSString *kSrcKey = @"src";
|
||||
static NSString *kAppleTouchIconValue = @"apple-touch-icon";
|
||||
static NSString *kAppleTouchIconPrecomposedValue = @"apple-touch-icon-precomposed";
|
||||
static NSString *kSizesKey = @"sizes";
|
||||
static NSString *kTitleKey = @"title";
|
||||
static NSString *kRelKey = @"rel";
|
||||
static NSString *kAlternateKey = @"alternate";
|
||||
static NSString *kRSSSuffix = @"/rss+xml";
|
||||
static NSString *kAtomSuffix = @"/atom+xml";
|
||||
static NSString *kTypeKey = @"type";
|
||||
|
||||
@interface RSHTMLMetadataAppleTouchIcon ()
|
||||
|
||||
- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString;
|
||||
|
||||
@implementation RSHTMLMetadataLink
|
||||
- (NSString*)description { return self.link; }
|
||||
@end
|
||||
|
||||
|
||||
@interface RSHTMLMetadataFeedLink ()
|
||||
@implementation RSHTMLMetadataIconLink
|
||||
|
||||
- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSHTMLMetadata
|
||||
|
||||
|
||||
#pragma mark - Init
|
||||
|
||||
- (instancetype)initWithURLString:(NSString *)urlString dictionaries:(NSArray <NSDictionary *> *)dictionaries {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
_baseURLString = urlString;
|
||||
_dictionaries = dictionaries;
|
||||
_faviconLink = [self resolvedLinkFromFirstDictionaryWithMatchingRel:kShortcutIconRelValue];
|
||||
|
||||
NSArray *appleTouchIconDictionaries = [self appleTouchIconDictionaries];
|
||||
_appleTouchIcons = objectsOfClassWithDictionaries([RSHTMLMetadataAppleTouchIcon class], appleTouchIconDictionaries, urlString);
|
||||
|
||||
NSArray *feedLinkDictionaries = [self feedLinkDictionaries];
|
||||
_feedLinks = objectsOfClassWithDictionaries([RSHTMLMetadataFeedLink class], feedLinkDictionaries, urlString);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Private
|
||||
|
||||
- (NSDictionary *)firstDictionaryWithMatchingRel:(NSString *)valueToMatch {
|
||||
|
||||
// Case-insensitive.
|
||||
|
||||
for (NSDictionary *oneDictionary in self.dictionaries) {
|
||||
|
||||
NSString *oneRelValue = relValue(oneDictionary);
|
||||
if (oneRelValue && [oneRelValue compare:valueToMatch options:NSCaseInsensitiveSearch] == NSOrderedSame) {
|
||||
return oneDictionary;
|
||||
- (CGSize)getSize {
|
||||
if (self.sizes && self.sizes.length > 0) {
|
||||
NSArray<NSString*> *parts = [self.sizes componentsSeparatedByString:@"x"];
|
||||
if (parts.count == 2) {
|
||||
return CGSizeMake([parts.firstObject intValue], [parts.lastObject intValue]);
|
||||
}
|
||||
}
|
||||
|
||||
return nil;
|
||||
return CGSizeZero;
|
||||
}
|
||||
|
||||
|
||||
- (NSArray *)appleTouchIconDictionaries {
|
||||
|
||||
NSMutableArray *dictionaries = [NSMutableArray new];
|
||||
|
||||
for (NSDictionary *oneDictionary in self.dictionaries) {
|
||||
|
||||
NSString *oneRelValue = relValue(oneDictionary).lowercaseString;
|
||||
if ([oneRelValue isEqualToString:kAppleTouchIconValue] || [oneRelValue isEqualToString:kAppleTouchIconPrecomposedValue]) {
|
||||
[dictionaries addObject:oneDictionary];
|
||||
}
|
||||
}
|
||||
|
||||
return dictionaries;
|
||||
- (NSString*)description {
|
||||
return [NSString stringWithFormat:@"%@ [%@] (%@)", self.title, self.sizes, self.link];
|
||||
}
|
||||
|
||||
|
||||
- (NSArray *)feedLinkDictionaries {
|
||||
|
||||
NSMutableArray *dictionaries = [NSMutableArray new];
|
||||
|
||||
for (NSDictionary *oneDictionary in self.dictionaries) {
|
||||
|
||||
NSString *oneRelValue = relValue(oneDictionary).lowercaseString;
|
||||
if (![oneRelValue isEqualToString:kAlternateKey]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
NSString *oneType = [oneDictionary rsxml_objectForCaseInsensitiveKey:kTypeKey];
|
||||
if (!typeIsFeedType(oneType)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (RSXMLStringIsEmpty(urlStringFromDictionary(oneDictionary))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
[dictionaries addObject:oneDictionary];
|
||||
}
|
||||
|
||||
return dictionaries;
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)resolvedLinkFromFirstDictionaryWithMatchingRel:(NSString *)relValue {
|
||||
|
||||
NSDictionary *d = [self firstDictionaryWithMatchingRel:relValue];
|
||||
return absoluteURLStringWithDictionary(d, self.baseURLString);
|
||||
}
|
||||
|
||||
|
||||
@end
|
||||
|
||||
|
||||
static NSString *relValue(NSDictionary *d) {
|
||||
|
||||
return [d rsxml_objectForCaseInsensitiveKey:kRelKey];
|
||||
}
|
||||
|
||||
|
||||
static NSString *urlStringFromDictionary(NSDictionary *d) {
|
||||
|
||||
NSString *urlString = [d rsxml_objectForCaseInsensitiveKey:kHrefKey];
|
||||
if (urlString) {
|
||||
return urlString;
|
||||
}
|
||||
|
||||
return [d rsxml_objectForCaseInsensitiveKey:kSrcKey];
|
||||
}
|
||||
|
||||
|
||||
static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString) {
|
||||
|
||||
NSURL *url = [NSURL URLWithString:baseURLString];
|
||||
if (!url) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSURL *absoluteURL = [NSURL URLWithString:relativeURLString relativeToURL:url];
|
||||
return absoluteURL.absoluteString;
|
||||
}
|
||||
|
||||
|
||||
static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString) {
|
||||
|
||||
NSString *urlString = urlStringFromDictionary(d);
|
||||
if (RSXMLStringIsEmpty(urlString)) {
|
||||
return nil;
|
||||
}
|
||||
return absoluteURLStringWithRelativeURLString(urlString, baseURLString);
|
||||
}
|
||||
|
||||
|
||||
static NSArray *objectsOfClassWithDictionaries(Class class, NSArray *dictionaries, NSString *baseURLString) {
|
||||
|
||||
NSMutableArray *objects = [NSMutableArray new];
|
||||
|
||||
for (NSDictionary *oneDictionary in dictionaries) {
|
||||
|
||||
id oneObject = [[class alloc] initWithDictionary:oneDictionary baseURLString:baseURLString];
|
||||
if (oneObject) {
|
||||
[objects addObject:oneObject];
|
||||
}
|
||||
}
|
||||
|
||||
return [objects copy];
|
||||
}
|
||||
|
||||
|
||||
static BOOL typeIsFeedType(NSString *type) {
|
||||
|
||||
type = type.lowercaseString;
|
||||
return [type hasSuffix:kRSSSuffix] || [type hasSuffix:kAtomSuffix];
|
||||
}
|
||||
|
||||
|
||||
@implementation RSHTMLMetadataAppleTouchIcon
|
||||
|
||||
|
||||
- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
|
||||
_sizes = [d rsxml_objectForCaseInsensitiveKey:kSizesKey];
|
||||
_rel = [d rsxml_objectForCaseInsensitiveKey:kRelKey];
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSHTMLMetadataFeedLink
|
||||
|
||||
|
||||
- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
- (NSString*)description {
|
||||
NSString *prefix;
|
||||
switch (_type) {
|
||||
case RSFeedTypeNone: prefix = @"None"; break;
|
||||
case RSFeedTypeRSS: prefix = @"RSS"; break;
|
||||
case RSFeedTypeAtom: prefix = @"Atom"; break;
|
||||
}
|
||||
|
||||
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
|
||||
_title = [d rsxml_objectForCaseInsensitiveKey:kTitleKey];
|
||||
_type = [d rsxml_objectForCaseInsensitiveKey:kTypeKey];
|
||||
|
||||
return self;
|
||||
return [NSString stringWithFormat:@"[%@] %@ (%@)", prefix, self.title, self.link];
|
||||
}
|
||||
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSHTMLMetadataAnchor
|
||||
|
||||
- (NSString*)description {
|
||||
if (!_tooltip) {
|
||||
return [NSString stringWithFormat:@"%@ (%@)", self.title, self.link];
|
||||
}
|
||||
return [NSString stringWithFormat:@"%@ [%@] (%@)", self.title, self.tooltip, self.link];
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSHTMLMetadata
|
||||
|
||||
- (NSString*)description {
|
||||
return [NSString stringWithFormat:@"favicon: %@\nFeed links: %@\nIcons: %@\n",
|
||||
self.faviconLink, self.feedLinks, self.iconLinks];
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,28 +1,32 @@
|
||||
//
|
||||
// RSHTMLMetadataParser.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 3/6/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
|
||||
#import "RSXMLParser.h"
|
||||
|
||||
@class RSHTMLMetadata;
|
||||
@class RSXMLData;
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@interface RSHTMLMetadataParser : NSObject
|
||||
|
||||
+ (RSHTMLMetadata *)HTMLMetadataWithXMLData:(RSXMLData *)xmlData;
|
||||
|
||||
- (instancetype)initWithXMLData:(RSXMLData *)xmlData;
|
||||
|
||||
@property (nonatomic, readonly) RSHTMLMetadata *metadata;
|
||||
|
||||
@interface RSHTMLMetadataParser : RSXMLParser<RSHTMLMetadata*>
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
|
||||
@@ -1,128 +1,111 @@
|
||||
//
|
||||
// RSHTMLMetadataParser.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 3/6/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import <libxml/xmlstring.h>
|
||||
#import "RSHTMLMetadataParser.h"
|
||||
#import "RSXMLData.h"
|
||||
#import "RSHTMLMetadata.h"
|
||||
#import "RSSAXHTMLParser.h"
|
||||
#import "RSSAXParser.h"
|
||||
#import "RSXMLInternal.h"
|
||||
|
||||
|
||||
@interface RSHTMLMetadataParser () <RSSAXHTMLParserDelegate>
|
||||
|
||||
@property (nonatomic, readonly) RSXMLData *xmlData;
|
||||
@property (nonatomic, readwrite) RSHTMLMetadata *metadata;
|
||||
@property (nonatomic) NSMutableArray *dictionaries;
|
||||
@property (nonatomic) BOOL didFinishParsing;
|
||||
#import "NSString+RSXML.h"
|
||||
#import "NSDictionary+RSXML.h"
|
||||
|
||||
@interface RSHTMLMetadataParser()
|
||||
@property (nonatomic, readonly) NSURL *baseURL;
|
||||
@property (nonatomic) NSString *faviconLink;
|
||||
@property (nonatomic) NSMutableArray<RSHTMLMetadataIconLink*> *iconLinks;
|
||||
@property (nonatomic) NSMutableArray<RSHTMLMetadataFeedLink*> *feedLinks;
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSHTMLMetadataParser
|
||||
|
||||
#pragma mark - RSXMLParserDelegate
|
||||
|
||||
#pragma mark - Class Methods
|
||||
+ (BOOL)isHTMLParser { return YES; }
|
||||
|
||||
+ (RSHTMLMetadata *)HTMLMetadataWithXMLData:(RSXMLData *)xmlData {
|
||||
- (BOOL)xmlParserWillStartParsing {
|
||||
_baseURL = [NSURL URLWithString:self.documentURI];
|
||||
_iconLinks = [NSMutableArray new];
|
||||
_feedLinks = [NSMutableArray new];
|
||||
return YES;
|
||||
}
|
||||
|
||||
RSHTMLMetadataParser *parser = [[self alloc] initWithXMLData:xmlData];
|
||||
return parser.metadata;
|
||||
- (id)xmlParserWillReturnDocument {
|
||||
RSHTMLMetadata *metadata = [[RSHTMLMetadata alloc] init];
|
||||
metadata.faviconLink = self.faviconLink;
|
||||
metadata.feedLinks = [self.feedLinks copy];
|
||||
metadata.iconLinks = [self.iconLinks copy];
|
||||
return metadata;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Init
|
||||
|
||||
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
|
||||
|
||||
NSParameterAssert(xmlData.data);
|
||||
NSParameterAssert(xmlData.urlString);
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
_xmlData = xmlData;
|
||||
_dictionaries = [NSMutableArray new];
|
||||
|
||||
[self parse];
|
||||
|
||||
return self;
|
||||
}
|
||||
#pragma mark - RSSAXParserDelegate
|
||||
|
||||
|
||||
#pragma mark - Parse
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
|
||||
|
||||
- (void)parse {
|
||||
|
||||
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
|
||||
[parser parseData:self.xmlData.data];
|
||||
[parser finishParsing];
|
||||
|
||||
self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.xmlData.urlString dictionaries:[self.dictionaries copy]];
|
||||
}
|
||||
|
||||
|
||||
static NSString *kHrefKey = @"href";
|
||||
static NSString *kSrcKey = @"src";
|
||||
static NSString *kRelKey = @"rel";
|
||||
|
||||
- (NSString *)linkForDictionary:(NSDictionary *)d {
|
||||
|
||||
NSString *link = [d rsxml_objectForCaseInsensitiveKey:kHrefKey];
|
||||
if (link) {
|
||||
return link;
|
||||
}
|
||||
|
||||
return [d rsxml_objectForCaseInsensitiveKey:kSrcKey];
|
||||
}
|
||||
|
||||
|
||||
- (void)handleLinkAttributes:(NSDictionary *)d {
|
||||
|
||||
if (RSXMLStringIsEmpty([d rsxml_objectForCaseInsensitiveKey:kRelKey])) {
|
||||
if (xmlStrlen(localName) != 4) {
|
||||
return;
|
||||
}
|
||||
if (RSXMLStringIsEmpty([self linkForDictionary:d])) {
|
||||
return;
|
||||
else if (EqualBytes(localName, "body", 4)) {
|
||||
[SAXParser cancel]; // we're only interested in head
|
||||
}
|
||||
else if (EqualBytes(localName, "link", 4)) {
|
||||
[self parseLinkItemWithAttributes:[SAXParser attributesDictionaryHTML:attributes]];
|
||||
}
|
||||
|
||||
[self.dictionaries addObject:d];
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - RSSAXHTMLParserDelegate
|
||||
|
||||
static const char *kBody = "body";
|
||||
static const NSInteger kBodyLength = 5;
|
||||
static const char *kLink = "link";
|
||||
static const NSInteger kLinkLength = 5;
|
||||
|
||||
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
|
||||
|
||||
if (self.didFinishParsing) {
|
||||
- (void)parseLinkItemWithAttributes:(NSDictionary*)attribs {
|
||||
if (!attribs || attribs.count == 0)
|
||||
return;
|
||||
NSString *rel = [attribs rsxml_objectForCaseInsensitiveKey:@"rel"];
|
||||
if (!rel || rel.length == 0)
|
||||
return;
|
||||
NSString *link = [attribs rsxml_objectForCaseInsensitiveKey:@"href"];
|
||||
if (!link) {
|
||||
link = [attribs rsxml_objectForCaseInsensitiveKey:@"src"];
|
||||
if (!link)
|
||||
return;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(localName, kBody, kBodyLength)) {
|
||||
self.didFinishParsing = YES;
|
||||
return;
|
||||
rel = [rel lowercaseString];
|
||||
|
||||
if ([rel isEqualToString:@"shortcut icon"]) {
|
||||
self.faviconLink = [link absoluteURLWithBase:self.baseURL];
|
||||
}
|
||||
|
||||
if (!RSSAXEqualTags(localName, kLink, kLinkLength)) {
|
||||
return;
|
||||
else if ([rel isEqualToString:@"icon"] || [rel hasPrefix:@"apple-touch-icon"]) { // also matching "apple-touch-icon-precomposed"
|
||||
RSHTMLMetadataIconLink *icon = [RSHTMLMetadataIconLink new];
|
||||
icon.link = [link absoluteURLWithBase:self.baseURL];
|
||||
icon.title = rel;
|
||||
icon.sizes = [attribs rsxml_objectForCaseInsensitiveKey:@"sizes"];
|
||||
[self.iconLinks addObject:icon];
|
||||
}
|
||||
|
||||
NSDictionary *d = [SAXParser attributesDictionary:attributes];
|
||||
if (!RSXMLIsEmpty(d)) {
|
||||
[self handleLinkAttributes:d];
|
||||
else if ([rel isEqualToString:@"alternate"]) {
|
||||
RSFeedType type = RSFeedTypeFromLinkTypeAttribute([attribs rsxml_objectForCaseInsensitiveKey:@"type"]);
|
||||
if (type != RSFeedTypeNone) {
|
||||
RSHTMLMetadataFeedLink *feedLink = [RSHTMLMetadataFeedLink new];
|
||||
feedLink.link = [link absoluteURLWithBase:self.baseURL];
|
||||
feedLink.title = [attribs rsxml_objectForCaseInsensitiveKey:@"title"];
|
||||
feedLink.type = type;
|
||||
[self.feedLinks addObject:feedLink];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
//
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
|
||||
|
||||
@@ -1,6 +1,28 @@
|
||||
//
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "RSOPMLItem.h"
|
||||
#import "RSXMLInternal.h"
|
||||
#import "NSDictionary+RSXML.h"
|
||||
|
||||
|
||||
NSString *OPMLTextKey = @"text";
|
||||
@@ -63,8 +85,8 @@ NSString *OPMLXMLURLKey = @"xmlUrl";
|
||||
}
|
||||
|
||||
- (id)attributeForKey:(NSString *)key {
|
||||
if (self.attributes.count > 0 && !RSXMLStringIsEmpty(key)) {
|
||||
return [self.attributes rsxml_objectForCaseInsensitiveKey:key];
|
||||
if (self.mutableAttributes.count > 0 && key && key.length > 0) {
|
||||
return [self.mutableAttributes rsxml_objectForCaseInsensitiveKey:key];
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
@@ -1,29 +1,35 @@
|
||||
//
|
||||
// RSOPMLParser.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 7/12/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
#import "RSXMLParser.h"
|
||||
|
||||
// <opml> <outline>
|
||||
// http://dev.opml.org/spec2.html#subscriptionLists
|
||||
|
||||
@class RSXMLData;
|
||||
@class RSOPMLItem;
|
||||
|
||||
|
||||
typedef void (^RSParsedOPMLBlock)(RSOPMLItem *opmlDocument, NSError *error);
|
||||
|
||||
void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback); //async; calls back on main thread.
|
||||
|
||||
|
||||
@interface RSOPMLParser: NSObject
|
||||
|
||||
- (instancetype)initWithXMLData:(RSXMLData *)xmlData;
|
||||
|
||||
@property (nonatomic, readonly) RSOPMLItem *opmlDocument;
|
||||
@property (nonatomic, readonly) NSError *error;
|
||||
@interface RSOPMLParser: RSXMLParser<RSOPMLItem*>
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@@ -1,172 +1,76 @@
|
||||
//
|
||||
// RSOPMLParser.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 7/12/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "RSOPMLParser.h"
|
||||
#import <libxml/xmlstring.h>
|
||||
#import "RSXMLData.h"
|
||||
#import "RSSAXParser.h"
|
||||
#import "RSOPMLItem.h"
|
||||
#import "RSXMLError.h"
|
||||
|
||||
|
||||
void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback) {
|
||||
|
||||
NSCParameterAssert(xmlData);
|
||||
NSCParameterAssert(callback);
|
||||
|
||||
dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{
|
||||
|
||||
@autoreleasepool {
|
||||
|
||||
RSOPMLParser *parser = [[RSOPMLParser alloc] initWithXMLData:xmlData];
|
||||
|
||||
RSOPMLItem *document = parser.opmlDocument;
|
||||
NSError *error = parser.error;
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
|
||||
callback(document, error);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@interface RSOPMLParser () <RSSAXParserDelegate>
|
||||
|
||||
@property (nonatomic, readwrite) RSOPMLItem *opmlDocument;
|
||||
@property (nonatomic, readwrite) NSError *error;
|
||||
@interface RSOPMLParser()
|
||||
@property (nonatomic, assign) BOOL parsingHead;
|
||||
@property (nonatomic) RSOPMLItem *opmlDocument;
|
||||
@property (nonatomic) NSMutableArray<RSOPMLItem*> *itemStack;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSOPMLParser
|
||||
|
||||
#pragma mark - RSXMLParserDelegate
|
||||
|
||||
#pragma mark - Init
|
||||
+ (BOOL)isOPMLParser { return YES; }
|
||||
|
||||
- (instancetype)initWithXMLData:(RSXMLData *)XMLData {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
[self parse:XMLData];
|
||||
|
||||
return self;
|
||||
+ (NSArray<const NSString*>*)parserRequireOrderedTags {
|
||||
return @[@"<opml", @"<outline"];
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Private
|
||||
|
||||
- (void)parse:(RSXMLData *)XMLData {
|
||||
|
||||
@autoreleasepool {
|
||||
|
||||
if ([self canParseData:XMLData.data]) {
|
||||
RSSAXParser *parser = [[RSSAXParser alloc] initWithDelegate:self];
|
||||
|
||||
self.itemStack = [NSMutableArray new];
|
||||
self.opmlDocument = [RSOPMLItem new];
|
||||
[self.itemStack addObject:self.opmlDocument];
|
||||
|
||||
[parser parseData:XMLData.data];
|
||||
[parser finishParsing];
|
||||
|
||||
} else {
|
||||
|
||||
NSString *filename = nil;
|
||||
NSURL *url = [NSURL URLWithString:XMLData.urlString];
|
||||
if (url && url.isFileURL) {
|
||||
filename = url.path.lastPathComponent;
|
||||
}
|
||||
if (!filename) {
|
||||
filename = XMLData.urlString;
|
||||
}
|
||||
self.error = RSXMLMakeError(RSXMLErrorFileNotOPML, filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- (BOOL)canParseData:(NSData *)d {
|
||||
|
||||
// Check for <opml and <outline near the top.
|
||||
|
||||
@autoreleasepool {
|
||||
|
||||
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)d.bytes length:d.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
|
||||
if (!s) {
|
||||
NSDictionary *options = @{NSStringEncodingDetectionSuggestedEncodingsKey : @[@(NSUTF8StringEncoding)]};
|
||||
(void)[NSString stringEncodingForData:d encodingOptions:options convertedString:&s usedLossyConversion:nil];
|
||||
}
|
||||
if (!s) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
static const NSInteger numberOfCharactersToSearch = 4096;
|
||||
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
|
||||
if (s.length < numberOfCharactersToSearch) {
|
||||
rangeToSearch.length = s.length;
|
||||
}
|
||||
|
||||
NSRange opmlRange = [s rangeOfString:@"<opml" options:NSCaseInsensitiveSearch range:rangeToSearch];
|
||||
if (opmlRange.location == NSNotFound) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
NSRange outlineRange = [s rangeOfString:@"<outline" options:NSLiteralSearch range:rangeToSearch];
|
||||
if (outlineRange.location == NSNotFound) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
if (outlineRange.location < opmlRange.location) {
|
||||
return NO;
|
||||
}
|
||||
}
|
||||
|
||||
- (BOOL)xmlParserWillStartParsing {
|
||||
self.opmlDocument = [RSOPMLItem new];
|
||||
self.itemStack = [NSMutableArray arrayWithObject:self.opmlDocument];
|
||||
return YES;
|
||||
}
|
||||
|
||||
|
||||
- (void)popItem {
|
||||
|
||||
NSAssert(self.itemStack.count > 0, nil);
|
||||
|
||||
/*If itemStack is empty, bad things are happening.
|
||||
But we still shouldn't crash in production.*/
|
||||
|
||||
if (self.itemStack.count > 0) {
|
||||
[self.itemStack removeLastObject];
|
||||
}
|
||||
- (id)xmlParserWillReturnDocument {
|
||||
return self.opmlDocument;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - RSSAXParserDelegate
|
||||
|
||||
static const char *kOutline = "outline";
|
||||
static const char kOutlineLength = 8;
|
||||
static const char *kHead = "head";
|
||||
static const char kHeadLength = 5;
|
||||
static BOOL isHead = NO;
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
|
||||
|
||||
if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) {
|
||||
int len = xmlStrlen(localName);
|
||||
|
||||
if (len == 7 && EqualBytes(localName, "outline", 7)) {
|
||||
RSOPMLItem *item = [RSOPMLItem new];
|
||||
item.attributes = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
|
||||
|
||||
[self.itemStack.lastObject addChild:item];
|
||||
[self.itemStack addObject:item];
|
||||
} else if (RSSAXEqualTags(localName, kHead, kHeadLength)) {
|
||||
isHead = YES;
|
||||
} else if (isHead) {
|
||||
}
|
||||
else if (len == 4 && EqualBytes(localName, "head", 4)) {
|
||||
self.parsingHead = YES;
|
||||
}
|
||||
else if (self.parsingHead) {
|
||||
[SAXParser beginStoringCharacters];
|
||||
}
|
||||
}
|
||||
@@ -174,13 +78,17 @@ static BOOL isHead = NO;
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
|
||||
|
||||
if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) {
|
||||
[self popItem];
|
||||
} else if (RSSAXEqualTags(localName, kHead, kHeadLength)) {
|
||||
isHead = NO;
|
||||
} else if (isHead) {
|
||||
int len = xmlStrlen(localName);
|
||||
|
||||
if (len == 7 && EqualBytes(localName, "outline", 7)) {
|
||||
[self.itemStack removeLastObject]; // safe to be called on empty array
|
||||
}
|
||||
else if (len == 4 && EqualBytes(localName, "head", 4)) {
|
||||
self.parsingHead = NO;
|
||||
}
|
||||
else if (self.parsingHead) { // handle xml tags in head as if they were attributes
|
||||
NSString *key = [NSString stringWithFormat:@"%s", localName];
|
||||
[self.itemStack.lastObject setAttribute:[SAXParser currentString] forKey:key];
|
||||
[self.itemStack.lastObject setAttribute:SAXParser.currentStringWithTrimmedWhitespace forKey:key];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,24 +99,24 @@ static BOOL isHead = NO;
|
||||
return nil;
|
||||
}
|
||||
|
||||
size_t nameLength = strlen((const char *)name);
|
||||
switch (nameLength) {
|
||||
int len = xmlStrlen(name);
|
||||
switch (len) {
|
||||
case 4:
|
||||
if (RSSAXEqualTags(name, "text", 5)) return OPMLTextKey;
|
||||
if (RSSAXEqualTags(name, "type", 5)) return OPMLTypeKey;
|
||||
if (EqualBytes(name, "text", 4)) return OPMLTextKey;
|
||||
if (EqualBytes(name, "type", 4)) return OPMLTypeKey;
|
||||
break;
|
||||
case 5:
|
||||
if (RSSAXEqualTags(name, "title", 6)) return OPMLTitleKey;
|
||||
if (EqualBytes(name, "title", 5)) return OPMLTitleKey;
|
||||
break;
|
||||
case 6:
|
||||
if (RSSAXEqualTags(name, "xmlUrl", 7)) return OPMLXMLURLKey;
|
||||
if (EqualBytes(name, "xmlUrl", 6)) return OPMLXMLURLKey;
|
||||
break;
|
||||
case 7:
|
||||
if (RSSAXEqualTags(name, "version", 8)) return OPMLVersionKey;
|
||||
if (RSSAXEqualTags(name, "htmlUrl", 8)) return OPMLHMTLURLKey;
|
||||
if (EqualBytes(name, "version", 7)) return OPMLVersionKey;
|
||||
if (EqualBytes(name, "htmlUrl", 7)) return OPMLHMTLURLKey;
|
||||
break;
|
||||
case 11:
|
||||
if (RSSAXEqualTags(name, "description", 12)) return OPMLDescriptionKey;
|
||||
if (EqualBytes(name, "description", 11)) return OPMLDescriptionKey;
|
||||
break;
|
||||
}
|
||||
return nil;
|
||||
@@ -220,11 +128,10 @@ static BOOL isHead = NO;
|
||||
if (length < 1) {
|
||||
return @"";
|
||||
} else if (length == 3) {
|
||||
if (RSSAXEqualBytes(bytes, "RSS", 3)) return @"RSS";
|
||||
if (RSSAXEqualBytes(bytes, "rss", 3)) return @"rss";
|
||||
if (EqualBytes(bytes, "RSS", 3)) return @"RSS";
|
||||
if (EqualBytes(bytes, "rss", 3)) return @"rss";
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,20 +1,37 @@
|
||||
//
|
||||
// RSParsedArticle.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 12/6/14.
|
||||
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
|
||||
|
||||
@interface RSParsedArticle : NSObject
|
||||
|
||||
- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL;
|
||||
- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL dateParsed:(NSDate*)parsed;
|
||||
|
||||
@property (nonatomic, readonly, nonnull) NSString *feedURL;
|
||||
@property (nonatomic, nonnull) NSString *articleID; //Calculated. Don't get until other properties have been set.
|
||||
@property (nonatomic, readonly, nonnull) NSDate *dateParsed;
|
||||
@property (nonatomic, readonly, nonnull) NSString *articleID; //Calculated. Don't get until other properties have been set.
|
||||
|
||||
@property (nonatomic, nullable) NSString *guid;
|
||||
@property (nonatomic, nullable) NSString *title;
|
||||
@@ -25,7 +42,6 @@
|
||||
@property (nonatomic, nullable) NSString *author;
|
||||
@property (nonatomic, nullable) NSDate *datePublished;
|
||||
@property (nonatomic, nullable) NSDate *dateModified;
|
||||
@property (nonatomic, nonnull) NSDate *dateParsed;
|
||||
|
||||
- (void)calculateArticleID; // Optimization. Call after all properties have been set. Call on a background thread.
|
||||
|
||||
|
||||
@@ -1,101 +1,104 @@
|
||||
//
|
||||
// RSParsedArticle.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 12/6/14.
|
||||
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "RSParsedArticle.h"
|
||||
#import "RSXMLInternal.h"
|
||||
#import "NSString+RSXML.h"
|
||||
|
||||
@interface RSParsedArticle()
|
||||
@property (nonatomic, copy) NSString *internalArticleID;
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSParsedArticle
|
||||
|
||||
|
||||
#pragma mark - Init
|
||||
|
||||
- (instancetype)initWithFeedURL:(NSString *)feedURL {
|
||||
- (instancetype)initWithFeedURL:(NSString *)feedURL dateParsed:(NSDate*)parsed {
|
||||
|
||||
NSParameterAssert(feedURL != nil);
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
if (self) {
|
||||
_feedURL = feedURL;
|
||||
_dateParsed = parsed;
|
||||
}
|
||||
|
||||
_feedURL = feedURL;
|
||||
_dateParsed = [NSDate date];
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
#pragma mark - Unique Article ID
|
||||
|
||||
#pragma mark - Accessors
|
||||
|
||||
/**
|
||||
Article ID will be generated on the first access.
|
||||
*/
|
||||
- (NSString *)articleID {
|
||||
|
||||
if (!_articleID) {
|
||||
_articleID = self.calculatedUniqueID;
|
||||
if (!_internalArticleID) {
|
||||
_internalArticleID = self.calculatedUniqueID;
|
||||
}
|
||||
|
||||
return _articleID;
|
||||
return _internalArticleID;
|
||||
}
|
||||
|
||||
/**
|
||||
Initiate calculation of article id.
|
||||
*/
|
||||
- (void)calculateArticleID {
|
||||
(void)self.articleID;
|
||||
}
|
||||
|
||||
/**
|
||||
@return MD5 hash of @c feedURL @c + @c guid. Or a combination of properties when guid is not set.
|
||||
@note
|
||||
In general, feeds should have guids. When they don't, re-runs are very likely,
|
||||
because there's no other 100% reliable way to determine identity.
|
||||
*/
|
||||
- (NSString *)calculatedUniqueID {
|
||||
|
||||
/*guid+feedID, or a combination of properties when no guid. Then hash the result.
|
||||
In general, feeds should have guids. When they don't, re-runs are very likely,
|
||||
because there's no other 100% reliable way to determine identity.*/
|
||||
|
||||
NSMutableString *s = [NSMutableString stringWithString:@""];
|
||||
NSAssert(self.feedURL != nil, @"Feed URL should always be set!");
|
||||
NSMutableString *s = [NSMutableString stringWithString:self.feedURL];
|
||||
|
||||
NSString *datePublishedTimeStampString = nil;
|
||||
if (self.datePublished) {
|
||||
datePublishedTimeStampString = [NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970];
|
||||
}
|
||||
|
||||
if (!RSXMLStringIsEmpty(self.guid)) {
|
||||
if (self.guid.length > 0) {
|
||||
[s appendString:self.guid];
|
||||
}
|
||||
|
||||
else if (!RSXMLStringIsEmpty(self.link) && self.datePublished != nil) {
|
||||
[s appendString:self.link];
|
||||
[s appendString:datePublishedTimeStampString];
|
||||
}
|
||||
|
||||
else if (!RSXMLStringIsEmpty(self.title) && self.datePublished != nil) {
|
||||
[s appendString:self.title];
|
||||
[s appendString:datePublishedTimeStampString];
|
||||
}
|
||||
|
||||
else if (self.datePublished != nil) {
|
||||
[s appendString:datePublishedTimeStampString];
|
||||
|
||||
if (self.link.length > 0) {
|
||||
[s appendString:self.link];
|
||||
} else if (self.title.length > 0) {
|
||||
[s appendString:self.title];
|
||||
}
|
||||
[s appendString:[NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970]];
|
||||
}
|
||||
|
||||
else if (!RSXMLStringIsEmpty(self.link)) {
|
||||
else if (self.link.length > 0) {
|
||||
[s appendString:self.link];
|
||||
}
|
||||
|
||||
else if (!RSXMLStringIsEmpty(self.title)) {
|
||||
else if (self.title.length > 0) {
|
||||
[s appendString:self.title];
|
||||
}
|
||||
|
||||
else if (!RSXMLStringIsEmpty(self.body)) {
|
||||
else if (self.body.length > 0) {
|
||||
[s appendString:self.body];
|
||||
}
|
||||
|
||||
NSAssert(!RSXMLStringIsEmpty(self.feedURL), nil);
|
||||
[s appendString:self.feedURL];
|
||||
|
||||
return [s rsxml_md5HashString];
|
||||
}
|
||||
|
||||
- (void)calculateArticleID {
|
||||
|
||||
(void)self.articleID;
|
||||
}
|
||||
#pragma mark - Printing
|
||||
|
||||
- (NSString*)description {
|
||||
return [NSString stringWithFormat:@"{%@ '%@', guid: %@}", [self class], self.title, self.guid];
|
||||
|
||||
@@ -1,23 +1,41 @@
|
||||
//
|
||||
// RSParsedFeed.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 7/12/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
|
||||
@class RSParsedArticle;
|
||||
|
||||
@interface RSParsedFeed : NSObject
|
||||
|
||||
- (nonnull instancetype)initWithURLString:(NSString * _Nonnull)urlString title:(NSString * _Nullable)title link:(NSString * _Nullable)link articles:(NSArray <RSParsedArticle *>* _Nonnull)articles;
|
||||
|
||||
@property (nonatomic, readonly, nonnull) NSString *urlString;
|
||||
@property (nonatomic, readonly, nullable) NSString *title;
|
||||
@property (nonatomic, readonly, nullable) NSString *link;
|
||||
@property (nonatomic, readonly, nonnull) NSDate *dateParsed;
|
||||
@property (nonatomic, readonly, nonnull) NSArray <RSParsedArticle *> *articles;
|
||||
|
||||
@property (nonatomic, nullable) NSString *title;
|
||||
@property (nonatomic, nullable) NSString *link;
|
||||
@property (nonatomic, nullable) NSString *subtitle;
|
||||
@property (nonatomic, readonly, nonnull) NSArray <RSParsedArticle *>*articles;
|
||||
|
||||
- (nonnull instancetype)initWithURLString:(NSString * _Nonnull)urlString;
|
||||
- (RSParsedArticle *)appendNewArticle;
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,33 +1,65 @@
|
||||
//
|
||||
// RSParsedFeed.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 7/12/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "RSParsedFeed.h"
|
||||
#import "RSParsedArticle.h"
|
||||
|
||||
@interface RSParsedFeed()
|
||||
@property (nonatomic) NSMutableArray <RSParsedArticle *> *mutableArticles;
|
||||
@end
|
||||
|
||||
@implementation RSParsedFeed
|
||||
|
||||
- (instancetype)initWithURLString:(NSString *)urlString title:(NSString *)title link:(NSString *)link articles:(NSArray *)articles {
|
||||
- (instancetype)initWithURLString:(NSString *)urlString {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
if (self) {
|
||||
_urlString = urlString;
|
||||
_mutableArticles = [NSMutableArray new];
|
||||
_dateParsed = [NSDate date];
|
||||
}
|
||||
|
||||
_urlString = urlString;
|
||||
_title = title;
|
||||
_link = link;
|
||||
_articles = articles;
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
- (NSArray<RSParsedArticle *> *)articles {
|
||||
return _mutableArticles;
|
||||
}
|
||||
|
||||
/**
|
||||
Append new @c RSParsedArticle object to @c .articles and return newly inserted instance.
|
||||
*/
|
||||
- (RSParsedArticle *)appendNewArticle {
|
||||
RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString dateParsed:_dateParsed];
|
||||
[_mutableArticles addObject:article];
|
||||
return article;
|
||||
}
|
||||
|
||||
#pragma mark - Printing
|
||||
|
||||
- (NSString*)description {
|
||||
return [NSString stringWithFormat:@"{%@ (%@), title: '%@', subtitle: '%@', entries: %@}",
|
||||
[self class], _link, _title, _subtitle, _articles];
|
||||
[self class], _link, _title, _subtitle, _mutableArticles];
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,13 +1,32 @@
|
||||
//
|
||||
// RSRSSParser.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 1/6/15.
|
||||
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "FeedParser.h"
|
||||
#import "RSFeedParser.h"
|
||||
|
||||
@interface RSRSSParser : NSObject <FeedParser>
|
||||
// <channel> <item>
|
||||
// https://cyber.harvard.edu/rss/rss.html
|
||||
|
||||
@interface RSRSSParser : RSFeedParser
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,351 +1,52 @@
|
||||
//
|
||||
// RSRSSParser.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 1/6/15.
|
||||
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import <libxml/xmlstring.h>
|
||||
#import "RSRSSParser.h"
|
||||
#import "RSSAXParser.h"
|
||||
#import "RSParsedFeed.h"
|
||||
#import "RSParsedArticle.h"
|
||||
#import "RSXMLData.h"
|
||||
#import "RSXMLInternal.h"
|
||||
#import "NSString+RSXML.h"
|
||||
#import "RSDateParser.h"
|
||||
#import "NSDictionary+RSXML.h"
|
||||
|
||||
|
||||
@interface RSRSSParser () <RSSAXParserDelegate>
|
||||
|
||||
@property (nonatomic) NSData *feedData;
|
||||
@property (nonatomic) NSString *urlString;
|
||||
@property (nonatomic) NSDictionary *currentAttributes;
|
||||
@property (nonatomic) RSSAXParser *parser;
|
||||
@property (nonatomic) NSMutableArray *articles;
|
||||
@property (nonatomic) BOOL parsingArticle;
|
||||
@property (nonatomic, readonly) RSParsedArticle *currentArticle;
|
||||
@property (nonatomic) BOOL parsingChannelImage;
|
||||
@property (nonatomic, readonly) NSDate *currentDate;
|
||||
@property (nonatomic) BOOL endRSSFound;
|
||||
@property (nonatomic) NSString *feedLink;
|
||||
@property (nonatomic) NSString *feedTitle;
|
||||
@property (nonatomic) NSString *feedSubtitle;
|
||||
@property (nonatomic) NSDate *dateParsed;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSRSSParser
|
||||
|
||||
#pragma mark - Class Methods
|
||||
|
||||
+ (BOOL)canParseFeed:(RSXMLData *)xmlData {
|
||||
|
||||
// Checking for '<rss' and '<channel>' within first n characters should do it.
|
||||
// TODO: handle RSS 1.0
|
||||
|
||||
@autoreleasepool {
|
||||
|
||||
NSData *feedData = xmlData.data;
|
||||
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)feedData.bytes length:feedData.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
|
||||
if (!s) {
|
||||
s = [[NSString alloc] initWithData:feedData encoding:NSUTF8StringEncoding];
|
||||
}
|
||||
if (!s) {
|
||||
s = [[NSString alloc] initWithData:feedData encoding:NSUnicodeStringEncoding];
|
||||
}
|
||||
if (!s) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
static const NSInteger numberOfCharactersToSearch = 4096;
|
||||
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
|
||||
if (s.length < numberOfCharactersToSearch) {
|
||||
rangeToSearch.length = s.length;
|
||||
}
|
||||
|
||||
NSRange rssRange = [s rangeOfString:@"<rss" options:NSLiteralSearch range:rangeToSearch];
|
||||
NSRange channelRange = [s rangeOfString:@"<channel>" options:NSLiteralSearch range:rangeToSearch];
|
||||
if (rssRange.length < 1 || channelRange.length < 1) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
if (rssRange.location > channelRange.location) {
|
||||
return NO; // Wrong order.
|
||||
}
|
||||
}
|
||||
|
||||
return YES;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Init
|
||||
|
||||
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
_feedData = xmlData.data;
|
||||
_urlString = xmlData.urlString;
|
||||
_parser = [[RSSAXParser alloc] initWithDelegate:self];
|
||||
_articles = [NSMutableArray new];
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - API
|
||||
|
||||
- (RSParsedFeed *)parseFeed {
|
||||
|
||||
[self parse];
|
||||
|
||||
RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.feedTitle link:self.feedLink articles:self.articles];
|
||||
parsedFeed.subtitle = self.feedSubtitle;
|
||||
|
||||
return parsedFeed;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Constants
|
||||
|
||||
static NSString *kIsPermaLinkKey = @"isPermaLink";
|
||||
static NSString *kURLKey = @"url";
|
||||
static NSString *kLengthKey = @"length";
|
||||
static NSString *kTypeKey = @"type";
|
||||
static NSString *kFalseValue = @"false";
|
||||
static NSString *kTrueValue = @"true";
|
||||
static NSString *kContentEncodedKey = @"content:encoded";
|
||||
static NSString *kDCDateKey = @"dc:date";
|
||||
static NSString *kDCCreatorKey = @"dc:creator";
|
||||
static NSString *kRDFAboutKey = @"rdf:about";
|
||||
|
||||
static const char *kItem = "item";
|
||||
static const NSInteger kItemLength = 5;
|
||||
@interface RSRSSParser () <RSSAXParserDelegate>
|
||||
@property (nonatomic) BOOL parsingArticle;
|
||||
@property (nonatomic) BOOL parsingChannelImage;
|
||||
@property (nonatomic) BOOL guidIsPermalink;
|
||||
@property (nonatomic) BOOL endRSSFound;
|
||||
@property (nonatomic) NSURL *baseURL;
|
||||
@end
|
||||
|
||||
static const char *kImage = "image";
|
||||
static const NSInteger kImageLength = 6;
|
||||
// TODO: handle RSS 1.0
|
||||
@implementation RSRSSParser
|
||||
|
||||
static const char *kLink = "link";
|
||||
static const NSInteger kLinkLength = 5;
|
||||
#pragma mark - RSXMLParserDelegate
|
||||
|
||||
static const char *kTitle = "title";
|
||||
static const NSInteger kTitleLength = 6;
|
||||
|
||||
static const char *kDC = "dc";
|
||||
static const NSInteger kDCLength = 3;
|
||||
|
||||
static const char *kCreator = "creator";
|
||||
static const NSInteger kCreatorLength = 8;
|
||||
|
||||
static const char *kDate = "date";
|
||||
static const NSInteger kDateLength = 5;
|
||||
|
||||
static const char *kContent = "content";
|
||||
static const NSInteger kContentLength = 8;
|
||||
|
||||
static const char *kEncoded = "encoded";
|
||||
static const NSInteger kEncodedLength = 8;
|
||||
|
||||
static const char *kGuid = "guid";
|
||||
static const NSInteger kGuidLength = 5;
|
||||
|
||||
static const char *kPubDate = "pubDate";
|
||||
static const NSInteger kPubDateLength = 8;
|
||||
|
||||
static const char *kAuthor = "author";
|
||||
static const NSInteger kAuthorLength = 7;
|
||||
|
||||
static const char *kDescription = "description";
|
||||
static const NSInteger kDescriptionLength = 12;
|
||||
|
||||
static const char *kRSS = "rss";
|
||||
static const NSInteger kRSSLength = 4;
|
||||
|
||||
static const char *kURL = "url";
|
||||
static const NSInteger kURLLength = 4;
|
||||
|
||||
static const char *kLength = "length";
|
||||
static const NSInteger kLengthLength = 7;
|
||||
|
||||
static const char *kType = "type";
|
||||
static const NSInteger kTypeLength = 5;
|
||||
|
||||
static const char *kIsPermaLink = "isPermaLink";
|
||||
static const NSInteger kIsPermaLinkLength = 12;
|
||||
|
||||
static const char *kRDF = "rdf";
|
||||
static const NSInteger kRDFlength = 4;
|
||||
|
||||
static const char *kAbout = "about";
|
||||
static const NSInteger kAboutLength = 6;
|
||||
|
||||
static const char *kFalse = "false";
|
||||
static const NSInteger kFalseLength = 6;
|
||||
|
||||
static const char *kTrue = "true";
|
||||
static const NSInteger kTrueLength = 5;
|
||||
|
||||
|
||||
#pragma mark - Parsing
|
||||
|
||||
- (void)parse {
|
||||
|
||||
self.dateParsed = [NSDate date];
|
||||
|
||||
@autoreleasepool {
|
||||
[self.parser parseData:self.feedData];
|
||||
[self.parser finishParsing];
|
||||
}
|
||||
|
||||
// Optimization: make articles do calculations on this background thread.
|
||||
[self.articles makeObjectsPerformSelector:@selector(calculateArticleID)];
|
||||
+ (NSArray<const NSString *> *)parserRequireOrderedTags {
|
||||
return @[@"<rss", @"<channel>"];
|
||||
}
|
||||
|
||||
|
||||
- (void)addArticle {
|
||||
|
||||
RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString];
|
||||
article.dateParsed = self.dateParsed;
|
||||
|
||||
[self.articles addObject:article];
|
||||
}
|
||||
|
||||
|
||||
- (RSParsedArticle *)currentArticle {
|
||||
|
||||
return self.articles.lastObject;
|
||||
}
|
||||
|
||||
|
||||
- (void)addFeedElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {
|
||||
|
||||
if (prefix != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
|
||||
if (!self.feedLink) {
|
||||
self.feedLink = self.parser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
|
||||
self.feedTitle = self.parser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) {
|
||||
self.feedSubtitle = self.parser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)addDCElement:(const xmlChar *)localName {
|
||||
|
||||
if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) {
|
||||
|
||||
self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
else if (RSSAXEqualTags(localName, kDate, kDateLength)) {
|
||||
|
||||
self.currentArticle.datePublished = self.currentDate;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)addGuid {
|
||||
|
||||
self.currentArticle.guid = self.parser.currentStringWithTrimmedWhitespace;
|
||||
|
||||
NSString *isPermaLinkValue = [self.currentAttributes rsxml_objectForCaseInsensitiveKey:@"ispermalink"];
|
||||
if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) {
|
||||
self.currentArticle.permalink = [self urlString:self.currentArticle.guid];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)urlString:(NSString *)s {
|
||||
|
||||
/*Resolve against home page URL (if available) or feed URL.*/
|
||||
|
||||
if ([[s lowercaseString] hasPrefix:@"http"]) {
|
||||
return s;
|
||||
}
|
||||
|
||||
if (!self.feedLink) {
|
||||
//TODO: get feed URL and use that to resolve URL.*/
|
||||
return s;
|
||||
}
|
||||
|
||||
NSURL *baseURL = [NSURL URLWithString:self.feedLink];
|
||||
if (!baseURL) {
|
||||
return s;
|
||||
}
|
||||
|
||||
NSURL *resolvedURL = [NSURL URLWithString:s relativeToURL:baseURL];
|
||||
if (resolvedURL.absoluteString) {
|
||||
return resolvedURL.absoluteString;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)currentStringWithHTMLEntitiesDecoded {
|
||||
|
||||
return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities];
|
||||
}
|
||||
|
||||
- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {
|
||||
|
||||
if (RSSAXEqualTags(prefix, kDC, kDCLength)) {
|
||||
|
||||
[self addDCElement:localName];
|
||||
return;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(prefix, kContent, kContentLength) && RSSAXEqualTags(localName, kEncoded, kEncodedLength)) {
|
||||
|
||||
self.currentArticle.body = [self currentStringWithHTMLEntitiesDecoded];
|
||||
return;
|
||||
}
|
||||
|
||||
if (prefix != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(localName, kGuid, kGuidLength)) {
|
||||
[self addGuid];
|
||||
}
|
||||
else if (RSSAXEqualTags(localName, kPubDate, kPubDateLength)) {
|
||||
self.currentArticle.datePublished = self.currentDate;
|
||||
}
|
||||
else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
|
||||
self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
else if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
|
||||
self.currentArticle.link = [self urlString:self.parser.currentStringWithTrimmedWhitespace];
|
||||
}
|
||||
else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) {
|
||||
self.currentArticle.abstract = [self currentStringWithHTMLEntitiesDecoded];
|
||||
}
|
||||
else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
|
||||
self.currentArticle.title = [self currentStringWithHTMLEntitiesDecoded];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (NSDate *)currentDate {
|
||||
|
||||
return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length);
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - RSSAXParserDelegate
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
|
||||
@@ -354,31 +55,61 @@ static const NSInteger kTrueLength = 5;
|
||||
return;
|
||||
}
|
||||
|
||||
NSDictionary *xmlAttributes = nil;
|
||||
if (RSSAXEqualTags(localName, kItem, kItemLength) || RSSAXEqualTags(localName, kGuid, kGuidLength)) {
|
||||
xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
|
||||
}
|
||||
if (self.currentAttributes != xmlAttributes) {
|
||||
self.currentAttributes = xmlAttributes;
|
||||
}
|
||||
int len = xmlStrlen(localName);
|
||||
|
||||
if (!prefix && RSSAXEqualTags(localName, kItem, kItemLength)) {
|
||||
|
||||
[self addArticle];
|
||||
self.parsingArticle = YES;
|
||||
|
||||
if (xmlAttributes && xmlAttributes[kRDFAboutKey]) { /*RSS 1.0 guid*/
|
||||
self.currentArticle.guid = xmlAttributes[kRDFAboutKey];
|
||||
self.currentArticle.permalink = self.currentArticle.guid;
|
||||
if (prefix != NULL) {
|
||||
if (!self.parsingArticle || self.parsingChannelImage) {
|
||||
return;
|
||||
}
|
||||
if (len != 4 && len != 7) {
|
||||
return;
|
||||
}
|
||||
int prefLen = xmlStrlen(prefix);
|
||||
if (prefLen == 2 && EqualBytes(prefix, "dc", 2)) {
|
||||
if (EqualBytes(localName, "date", 4) || EqualBytes(localName, "creator", 7)) {
|
||||
[SAXParser beginStoringCharacters];
|
||||
}
|
||||
}
|
||||
else if (len == 7 && prefLen == 7 && EqualBytes(prefix, "content", 7) && EqualBytes(localName, "encoded", 7)) {
|
||||
[SAXParser beginStoringCharacters];
|
||||
}
|
||||
return;
|
||||
}
|
||||
// else: localname without prefix
|
||||
switch (len) {
|
||||
case 4:
|
||||
if (EqualBytes(localName, "item", 4)) {
|
||||
self.parsingArticle = YES;
|
||||
self.currentArticle = [self.parsedFeed appendNewArticle];
|
||||
|
||||
NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
|
||||
if (attribs) {
|
||||
NSString *about = attribs[kRDFAboutKey]; // RSS 1.0 guid
|
||||
if (about) {
|
||||
self.currentArticle.guid = about;
|
||||
self.currentArticle.permalink = about;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (EqualBytes(localName, "guid", 4)) {
|
||||
NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
|
||||
NSString *isPermaLinkValue = [attribs rsxml_objectForCaseInsensitiveKey:@"isPermaLink"];
|
||||
if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) {
|
||||
self.guidIsPermalink = YES;
|
||||
} else {
|
||||
self.guidIsPermalink = NO;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
if (EqualBytes(localName, "image", 5)) {
|
||||
self.parsingChannelImage = YES;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
else if (!prefix && RSSAXEqualTags(localName, kImage, kImageLength)) {
|
||||
self.parsingChannelImage = YES;
|
||||
}
|
||||
|
||||
if (!self.parsingChannelImage) {
|
||||
[self.parser beginStoringCharacters];
|
||||
if (self.parsingArticle || !self.parsingChannelImage) {
|
||||
[SAXParser beginStoringCharacters];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -388,77 +119,132 @@ static const NSInteger kTrueLength = 5;
|
||||
if (self.endRSSFound) {
|
||||
return;
|
||||
}
|
||||
|
||||
int len = xmlStrlen(localName);
|
||||
|
||||
if (RSSAXEqualTags(localName, kRSS, kRSSLength)) {
|
||||
self.endRSSFound = YES;
|
||||
// Meta parsing
|
||||
if (len == 3 && EqualBytes(localName, "rss", 3)) { self.endRSSFound = YES; }
|
||||
else if (len == 4 && EqualBytes(localName, "item", 4)) { self.parsingArticle = NO; }
|
||||
else if (len == 5 && EqualBytes(localName, "image", 5)) { self.parsingChannelImage = NO; }
|
||||
// Always exit if prefix is set
|
||||
else if (prefix != NULL)
|
||||
{
|
||||
if (!self.parsingArticle) {
|
||||
// Feed parsing
|
||||
return;
|
||||
}
|
||||
int prefLen = xmlStrlen(prefix);
|
||||
// Article parsing
|
||||
switch (len) {
|
||||
case 4:
|
||||
if (prefLen == 2 && EqualBytes(prefix, "dc", 2) && EqualBytes(localName, "date", 4))
|
||||
self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters];
|
||||
return;
|
||||
case 7:
|
||||
if (prefLen == 2 && EqualBytes(prefix, "dc", 2) && EqualBytes(localName, "creator", 7)) {
|
||||
self.currentArticle.author = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
else if (prefLen == 7 && EqualBytes(prefix, "content", 7) && EqualBytes(localName, "encoded", 7)) {
|
||||
self.currentArticle.body = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace];
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kImage, kImageLength)) {
|
||||
self.parsingChannelImage = NO;
|
||||
// Article parsing
|
||||
else if (self.parsingArticle)
|
||||
{
|
||||
switch (len) {
|
||||
case 4:
|
||||
if (EqualBytes(localName, "link", 4)) {
|
||||
self.currentArticle.link = [SAXParser.currentStringWithTrimmedWhitespace absoluteURLWithBase:self.baseURL];
|
||||
}
|
||||
else if (EqualBytes(localName, "guid", 4)) {
|
||||
self.currentArticle.guid = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
if (self.guidIsPermalink) {
|
||||
self.currentArticle.permalink = [self.currentArticle.guid absoluteURLWithBase:self.baseURL];
|
||||
}
|
||||
}
|
||||
return;
|
||||
case 5:
|
||||
if (EqualBytes(localName, "title", 5))
|
||||
self.currentArticle.title = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace];
|
||||
return;
|
||||
case 6:
|
||||
if (EqualBytes(localName, "author", 6))
|
||||
self.currentArticle.author = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
return;
|
||||
case 7:
|
||||
if (EqualBytes(localName, "pubDate", 7))
|
||||
self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters];
|
||||
return;
|
||||
case 11:
|
||||
if (EqualBytes(localName, "description", 11))
|
||||
self.currentArticle.abstract = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
else if (RSSAXEqualTags(localName, kItem, kItemLength)) {
|
||||
self.parsingArticle = NO;
|
||||
}
|
||||
|
||||
else if (self.parsingArticle) {
|
||||
[self addArticleElement:localName prefix:prefix];
|
||||
}
|
||||
|
||||
else if (!self.parsingChannelImage) {
|
||||
[self addFeedElement:localName prefix:prefix];
|
||||
// Feed parsing
|
||||
else if (!self.parsingChannelImage)
|
||||
{
|
||||
switch (len) {
|
||||
case 4:
|
||||
if (EqualBytes(localName, "link", 4)) {
|
||||
self.parsedFeed.link = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
self.baseURL = [NSURL URLWithString:self.parsedFeed.link];
|
||||
}
|
||||
return;
|
||||
case 5:
|
||||
if (EqualBytes(localName, "title", 5))
|
||||
self.parsedFeed.title = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
return;
|
||||
case 11:
|
||||
if (EqualBytes(localName, "description", 11))
|
||||
self.parsedFeed.subtitle = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
|
||||
|
||||
if (RSSAXEqualTags(prefix, kRDF, kRDFlength)) {
|
||||
|
||||
if (RSSAXEqualTags(name, kAbout, kAboutLength)) {
|
||||
return kRDFAboutKey;
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
int len = xmlStrlen(name);
|
||||
|
||||
if (prefix) {
|
||||
if (len == 5 && EqualBytes(prefix, "rdf", 4) && EqualBytes(name, "about", 5)) { // 4 because prefix length is not checked
|
||||
return kRDFAboutKey;
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(name, kIsPermaLink, kIsPermaLinkLength)) {
|
||||
return kIsPermaLinkKey;
|
||||
switch (len) {
|
||||
case 3:
|
||||
if (EqualBytes(name, "url", 3)) { return @"url"; }
|
||||
break;
|
||||
case 4:
|
||||
if (EqualBytes(name, "type", 4)) { return @"type"; }
|
||||
break;
|
||||
case 6:
|
||||
if (EqualBytes(name, "length", 6)) { return @"length"; }
|
||||
break;
|
||||
case 11:
|
||||
if (EqualBytes(name, "isPermaLink", 11)) { return @"isPermaLink"; }
|
||||
break;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(name, kURL, kURLLength)) {
|
||||
return kURLKey;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(name, kLength, kLengthLength)) {
|
||||
return kLengthKey;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(name, kType, kTypeLength)) {
|
||||
return kTypeKey;
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
|
||||
|
||||
static const NSUInteger falseLength = kFalseLength - 1;
|
||||
static const NSUInteger trueLength = kTrueLength - 1;
|
||||
|
||||
if (length == falseLength && RSSAXEqualBytes(bytes, kFalse, falseLength)) {
|
||||
return kFalseValue;
|
||||
switch (length) {
|
||||
case 4:
|
||||
if (EqualBytes(bytes, "true", 4)) { return @"true"; }
|
||||
break;
|
||||
case 5:
|
||||
if (EqualBytes(bytes, "false", 5)) { return @"false"; }
|
||||
break;
|
||||
}
|
||||
|
||||
if (length == trueLength && RSSAXEqualBytes(bytes, kTrue, trueLength)) {
|
||||
return kTrueValue;
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
//
|
||||
// RSSAXHTMLParser.h
|
||||
// RSXML
|
||||
//
|
||||
// Created by Brent Simmons on 3/6/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
@import Foundation;
|
||||
|
||||
@class RSSAXHTMLParser;
|
||||
|
||||
@protocol RSSAXHTMLParserDelegate <NSObject>
|
||||
|
||||
@optional
|
||||
|
||||
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const unsigned char *)localName attributes:(const unsigned char **)attributes;
|
||||
|
||||
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLEndElement:(const unsigned char *)localName;
|
||||
|
||||
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length;
|
||||
|
||||
- (void)saxParserDidReachEndOfDocument:(RSSAXHTMLParser *)SAXParser; // If canceled, may not get called (but might).
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@interface RSSAXHTMLParser : NSObject
|
||||
|
||||
|
||||
- (instancetype)initWithDelegate:(id<RSSAXHTMLParserDelegate>)delegate;
|
||||
|
||||
- (void)parseData:(NSData *)data;
|
||||
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes;
|
||||
- (void)finishParsing;
|
||||
- (void)cancel;
|
||||
|
||||
@property (nonatomic, strong, readonly) NSData *currentCharacters; // nil if not storing characters. UTF-8 encoded.
|
||||
@property (nonatomic, strong, readonly) NSString *currentString; // Convenience to get string version of currentCharacters.
|
||||
@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
|
||||
|
||||
- (void)beginStoringCharacters; // Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.
|
||||
|
||||
// Delegate can call from within XMLStartElement.
|
||||
|
||||
- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes;
|
||||
|
||||
|
||||
@end
|
||||
@@ -1,315 +0,0 @@
|
||||
//
|
||||
// RSSAXHTMLParser.m
|
||||
// RSXML
|
||||
//
|
||||
// Created by Brent Simmons on 3/6/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
#import "RSSAXHTMLParser.h"
|
||||
#import "RSSAXParser.h"
|
||||
#import <libxml/tree.h>
|
||||
#import <libxml/xmlstring.h>
|
||||
#import <libxml/HTMLparser.h>
|
||||
#import "RSXMLInternal.h"
|
||||
|
||||
|
||||
@interface RSSAXHTMLParser ()
|
||||
|
||||
@property (nonatomic) id<RSSAXHTMLParserDelegate> delegate;
|
||||
@property (nonatomic, assign) htmlParserCtxtPtr context;
|
||||
@property (nonatomic, assign) BOOL storingCharacters;
|
||||
@property (nonatomic) NSMutableData *characters;
|
||||
@property (nonatomic) BOOL delegateRespondsToStartElementMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToEndElementMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSSAXHTMLParser
|
||||
|
||||
|
||||
+ (void)initialize {
|
||||
|
||||
RSSAXInitLibXMLParser();
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Init
|
||||
|
||||
- (instancetype)initWithDelegate:(id<RSSAXHTMLParserDelegate>)delegate {
|
||||
|
||||
self = [super init];
|
||||
if (self == nil)
|
||||
return nil;
|
||||
|
||||
_delegate = delegate;
|
||||
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]) {
|
||||
_delegateRespondsToStartElementMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]) {
|
||||
_delegateRespondsToEndElementMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) {
|
||||
_delegateRespondsToCharactersFoundMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) {
|
||||
_delegateRespondsToEndOfDocumentMethod = YES;
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Dealloc
|
||||
|
||||
- (void)dealloc {
|
||||
|
||||
if (_context != nil) {
|
||||
htmlFreeParserCtxt(_context);
|
||||
_context = nil;
|
||||
}
|
||||
_delegate = nil;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - API
|
||||
|
||||
static xmlSAXHandler saxHandlerStruct;
|
||||
|
||||
- (void)parseData:(NSData *)data {
|
||||
|
||||
[self parseBytes:data.bytes numberOfBytes:data.length];
|
||||
}
|
||||
|
||||
|
||||
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
|
||||
|
||||
if (self.context == nil) {
|
||||
|
||||
xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes);
|
||||
self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding);
|
||||
htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT);
|
||||
}
|
||||
|
||||
@autoreleasepool {
|
||||
htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)finishParsing {
|
||||
|
||||
NSAssert(self.context != nil, nil);
|
||||
if (self.context == nil)
|
||||
return;
|
||||
|
||||
@autoreleasepool {
|
||||
htmlParseChunk(self.context, nil, 0, 1);
|
||||
htmlFreeParserCtxt(self.context);
|
||||
self.context = nil;
|
||||
self.characters = nil;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)cancel {
|
||||
|
||||
@autoreleasepool {
|
||||
xmlStopParser(self.context);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
- (void)beginStoringCharacters {
|
||||
self.storingCharacters = YES;
|
||||
self.characters = [NSMutableData new];
|
||||
}
|
||||
|
||||
|
||||
- (void)endStoringCharacters {
|
||||
self.storingCharacters = NO;
|
||||
self.characters = nil;
|
||||
}
|
||||
|
||||
|
||||
- (NSData *)currentCharacters {
|
||||
|
||||
if (!self.storingCharacters) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
return self.characters;
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)currentString {
|
||||
|
||||
NSData *d = self.currentCharacters;
|
||||
if (RSXMLIsEmpty(d)) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding];
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)currentStringWithTrimmedWhitespace {
|
||||
|
||||
return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Attributes Dictionary
|
||||
|
||||
- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes {
|
||||
|
||||
if (!attributes) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSMutableDictionary *d = [NSMutableDictionary new];
|
||||
|
||||
NSInteger ix = 0;
|
||||
NSString *currentKey = nil;
|
||||
while (true) {
|
||||
|
||||
const xmlChar *oneAttribute = attributes[ix];
|
||||
ix++;
|
||||
|
||||
if (!currentKey && !oneAttribute) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!currentKey) {
|
||||
currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute];
|
||||
}
|
||||
else {
|
||||
NSString *value = nil;
|
||||
if (oneAttribute) {
|
||||
value = [NSString stringWithUTF8String:(const char *)oneAttribute];
|
||||
}
|
||||
|
||||
d[currentKey] = value ? value : @"";
|
||||
currentKey = nil;
|
||||
}
|
||||
}
|
||||
|
||||
return [d copy];
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Callbacks
|
||||
|
||||
- (void)xmlEndDocument {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.delegateRespondsToEndOfDocumentMethod) {
|
||||
[self.delegate saxParserDidReachEndOfDocument:self];
|
||||
}
|
||||
|
||||
[self endStoringCharacters];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.storingCharacters) {
|
||||
[self.characters appendBytes:(const void *)ch length:length];
|
||||
}
|
||||
|
||||
if (self.delegateRespondsToCharactersFoundMethod) {
|
||||
[self.delegate saxParser:self XMLCharactersFound:ch length:length];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)xmlStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.delegateRespondsToStartElementMethod) {
|
||||
|
||||
[self.delegate saxParser:self XMLStartElement:localName attributes:attributes];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)xmlEndElement:(const xmlChar *)localName {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.delegateRespondsToEndElementMethod) {
|
||||
[self.delegate saxParser:self XMLEndElement:localName];
|
||||
}
|
||||
|
||||
[self endStoringCharacters];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@end
|
||||
|
||||
|
||||
static void startElementSAX(void *context, const xmlChar *localname, const xmlChar **attributes) {
|
||||
|
||||
[(__bridge RSSAXHTMLParser *)context xmlStartElement:localname attributes:attributes];
|
||||
}
|
||||
|
||||
|
||||
static void endElementSAX(void *context, const xmlChar *localname) {
|
||||
[(__bridge RSSAXHTMLParser *)context xmlEndElement:localname];
|
||||
}
|
||||
|
||||
|
||||
static void charactersFoundSAX(void *context, const xmlChar *ch, int len) {
|
||||
[(__bridge RSSAXHTMLParser *)context xmlCharactersFound:ch length:(NSUInteger)len];
|
||||
}
|
||||
|
||||
|
||||
static void endDocumentSAX(void *context) {
|
||||
[(__bridge RSSAXHTMLParser *)context xmlEndDocument];
|
||||
}
|
||||
|
||||
|
||||
static htmlSAXHandler saxHandlerStruct = {
|
||||
nil, /* internalSubset */
|
||||
nil, /* isStandalone */
|
||||
nil, /* hasInternalSubset */
|
||||
nil, /* hasExternalSubset */
|
||||
nil, /* resolveEntity */
|
||||
nil, /* getEntity */
|
||||
nil, /* entityDecl */
|
||||
nil, /* notationDecl */
|
||||
nil, /* attributeDecl */
|
||||
nil, /* elementDecl */
|
||||
nil, /* unparsedEntityDecl */
|
||||
nil, /* setDocumentLocator */
|
||||
nil, /* startDocument */
|
||||
endDocumentSAX, /* endDocument */
|
||||
startElementSAX, /* startElement*/
|
||||
endElementSAX, /* endElement */
|
||||
nil, /* reference */
|
||||
charactersFoundSAX, /* characters */
|
||||
nil, /* ignorableWhitespace */
|
||||
nil, /* processingInstruction */
|
||||
nil, /* comment */
|
||||
nil, /* warning */
|
||||
nil, /* error */
|
||||
nil, /* fatalError //: unused error() get all the errors */
|
||||
nil, /* getParameterEntity */
|
||||
nil, /* cdataBlock */
|
||||
nil, /* externalSubset */
|
||||
XML_SAX2_MAGIC,
|
||||
nil,
|
||||
nil, /* startElementNs */
|
||||
nil, /* endElementNs */
|
||||
nil /* serror */
|
||||
};
|
||||
|
||||
@@ -1,12 +1,29 @@
|
||||
//
|
||||
// RSSAXParser.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 3/25/15.
|
||||
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
#import <libxml/xmlstring.h>
|
||||
|
||||
/*Thread-safe, not re-entrant.
|
||||
|
||||
@@ -22,48 +39,39 @@
|
||||
|
||||
@protocol RSSAXParserDelegate <NSObject>
|
||||
|
||||
+ (BOOL)isHTMLParser; // reusing class method of RSXMLParser delegate
|
||||
|
||||
@optional
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const unsigned char **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const unsigned char **)attributes;
|
||||
// Called when parsing HTML
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes;
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName;
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri;
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length;
|
||||
|
||||
- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; /*If canceled, may not get called (but might).*/
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const unsigned char *)name prefix:(const unsigned char *)prefix; /*Okay to return nil. Prefix may be nil.*/
|
||||
// Called when parsing XML (Atom, RSS, OPML)
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes;
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri;
|
||||
|
||||
// Called regardless of parser type
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const xmlChar *)characters length:(NSUInteger)length;
|
||||
- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; // If canceled, may not get called (but might).
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix; // Okay to return nil. Prefix may be nil.
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
void RSSAXInitLibXMLParser(void); // Needed by RSSAXHTMLParser.
|
||||
|
||||
/*For use by delegate.*/
|
||||
|
||||
BOOL RSSAXEqualTags(const unsigned char *localName, const char *tag, NSInteger tagLength);
|
||||
BOOL RSSAXEqualBytes(const void *bytes1, const void *bytes2, NSUInteger length);
|
||||
|
||||
|
||||
@interface RSSAXParser : NSObject
|
||||
@property (nonatomic, strong, readonly) NSData *currentCharacters;
|
||||
@property (nonatomic, strong, readonly) NSString *currentString;
|
||||
@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
|
||||
|
||||
- (instancetype)initWithDelegate:(id<RSSAXParserDelegate>)delegate;
|
||||
|
||||
- (void)parseData:(NSData *)data;
|
||||
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes;
|
||||
- (void)finishParsing;
|
||||
- (void)cancel;
|
||||
|
||||
@property (nonatomic, strong, readonly) NSData *currentCharacters; /*nil if not storing characters. UTF-8 encoded.*/
|
||||
@property (nonatomic, strong, readonly) NSString *currentString; /*Convenience to get string version of currentCharacters.*/
|
||||
@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
|
||||
|
||||
- (void)beginStoringCharacters; /*Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.*/
|
||||
|
||||
/*Delegate can call from within XMLStartElement. Returns nil if numberOfAttributes < 1.*/
|
||||
- (void)beginStoringCharacters;
|
||||
|
||||
- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes numberOfAttributes:(NSInteger)numberOfAttributes;
|
||||
- (NSDictionary *)attributesDictionaryHTML:(const xmlChar **)attributes;
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,42 +1,57 @@
|
||||
//
|
||||
// RSSAXParser.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 3/25/15.
|
||||
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import <libxml/tree.h>
|
||||
#import <libxml/xmlstring.h>
|
||||
#import <libxml/parser.h>
|
||||
#import "RSSAXParser.h"
|
||||
#import "RSXMLInternal.h"
|
||||
|
||||
|
||||
@interface RSSAXParser ()
|
||||
|
||||
@property (nonatomic, weak) id<RSSAXParserDelegate> delegate;
|
||||
@property (nonatomic, assign) xmlParserCtxtPtr context;
|
||||
@property (nonatomic, assign) BOOL storingCharacters;
|
||||
@property (nonatomic) NSMutableData *characters;
|
||||
@property (nonatomic) BOOL delegateRespondsToInternedStringMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToInternedStringForValueMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToStartElementMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToEndElementMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod;
|
||||
|
||||
@property (nonatomic, assign) BOOL isHTMLParser;
|
||||
@property (nonatomic, assign) BOOL delegateRespondsToInternedStringMethod;
|
||||
@property (nonatomic, assign) BOOL delegateRespondsToInternedStringForValueMethod;
|
||||
@property (nonatomic, assign) BOOL delegateRespondsToStartElementMethod;
|
||||
@property (nonatomic, assign) BOOL delegateRespondsToEndElementMethod;
|
||||
@property (nonatomic, assign) BOOL delegateRespondsToCharactersFoundMethod;
|
||||
@property (nonatomic, assign) BOOL delegateRespondsToEndOfDocumentMethod;
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSSAXParser
|
||||
|
||||
+ (void)initialize {
|
||||
|
||||
RSSAXInitLibXMLParser();
|
||||
static dispatch_once_t onceToken;
|
||||
dispatch_once(&onceToken, ^{
|
||||
xmlInitParser();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Init
|
||||
|
||||
- (instancetype)initWithDelegate:(id<RSSAXParserDelegate>)delegate {
|
||||
@@ -46,32 +61,23 @@
|
||||
return nil;
|
||||
|
||||
_delegate = delegate;
|
||||
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]) {
|
||||
_delegateRespondsToInternedStringMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]) {
|
||||
_delegateRespondsToInternedStringForValueMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]) {
|
||||
_delegateRespondsToStartElementMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]) {
|
||||
_delegateRespondsToEndElementMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) {
|
||||
_delegateRespondsToCharactersFoundMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) {
|
||||
_delegateRespondsToEndOfDocumentMethod = YES;
|
||||
_delegateRespondsToCharactersFoundMethod = [_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)];
|
||||
_delegateRespondsToEndOfDocumentMethod = [_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)];
|
||||
_delegateRespondsToInternedStringMethod = [_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)];
|
||||
_delegateRespondsToInternedStringForValueMethod = [_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)];
|
||||
|
||||
if ([[_delegate class] respondsToSelector:@selector(isHTMLParser)] && [[_delegate class] isHTMLParser]) {
|
||||
_isHTMLParser = YES;
|
||||
_delegateRespondsToStartElementMethod = [_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)];
|
||||
_delegateRespondsToEndElementMethod = [_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)];
|
||||
} else {
|
||||
_delegateRespondsToStartElementMethod = [_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)];
|
||||
_delegateRespondsToEndElementMethod = [_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)];
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Dealloc
|
||||
|
||||
- (void)dealloc {
|
||||
if (_context != nil) {
|
||||
xmlFreeParserCtxt(_context);
|
||||
@@ -83,28 +89,39 @@
|
||||
|
||||
#pragma mark - API
|
||||
|
||||
|
||||
static xmlSAXHandler saxHandlerStruct;
|
||||
|
||||
- (void)parseData:(NSData *)data {
|
||||
|
||||
[self parseBytes:data.bytes numberOfBytes:data.length];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Initialize new xml or html parser context and start processing of data.
|
||||
*/
|
||||
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
|
||||
|
||||
if (self.context == nil) {
|
||||
|
||||
self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil);
|
||||
xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT);
|
||||
if (self.isHTMLParser) {
|
||||
xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes);
|
||||
self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding);
|
||||
htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT);
|
||||
} else {
|
||||
self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil);
|
||||
xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT);
|
||||
}
|
||||
}
|
||||
|
||||
@autoreleasepool {
|
||||
xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0);
|
||||
if (self.isHTMLParser) {
|
||||
htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0);
|
||||
} else {
|
||||
xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0);
|
||||
}
|
||||
}
|
||||
|
||||
[self finishParsing];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Call after @c parseData: or @c parseBytes:numberOfBytes:
|
||||
*/
|
||||
- (void)finishParsing {
|
||||
|
||||
NSAssert(self.context != nil, nil);
|
||||
@@ -112,63 +129,70 @@ static xmlSAXHandler saxHandlerStruct;
|
||||
return;
|
||||
|
||||
@autoreleasepool {
|
||||
xmlParseChunk(self.context, nil, 0, 1);
|
||||
xmlFreeParserCtxt(self.context);
|
||||
if (self.isHTMLParser) {
|
||||
htmlParseChunk(self.context, nil, 0, 1);
|
||||
htmlFreeParserCtxt(self.context);
|
||||
} else {
|
||||
xmlParseChunk(self.context, nil, 0, 1);
|
||||
xmlFreeParserCtxt(self.context);
|
||||
}
|
||||
self.context = nil;
|
||||
self.characters = nil;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Will stop the sax parser from processing any further. @c saxParserDidReachEndOfDocument: will not be called.
|
||||
- (void)cancel {
|
||||
|
||||
@autoreleasepool {
|
||||
xmlStopParser(self.context);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Delegate can call from @c XMLStartElement.
|
||||
Characters will be available in @c XMLEndElement as @c currentCharacters property.
|
||||
Storing characters is stopped after each @c XMLEndElement.
|
||||
*/
|
||||
- (void)beginStoringCharacters {
|
||||
self.storingCharacters = YES;
|
||||
self.characters = [NSMutableData new];
|
||||
}
|
||||
|
||||
|
||||
/// Will be called after each closing tag and the document end.
|
||||
- (void)endStoringCharacters {
|
||||
self.storingCharacters = NO;
|
||||
self.characters = nil;
|
||||
}
|
||||
|
||||
|
||||
/// @return @c nil if not storing characters. UTF-8 encoded.
|
||||
- (NSData *)currentCharacters {
|
||||
|
||||
if (!self.storingCharacters) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
return self.characters;
|
||||
}
|
||||
|
||||
|
||||
/// Convenience method to get string version of @c currentCharacters.
|
||||
- (NSString *)currentString {
|
||||
|
||||
NSData *d = self.currentCharacters;
|
||||
if (RSXMLIsEmpty(d)) {
|
||||
if (!d || d.length == 0) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding];
|
||||
}
|
||||
|
||||
|
||||
/// Trim whitespace and newline characters from @c currentString.
|
||||
- (NSString *)currentStringWithTrimmedWhitespace {
|
||||
|
||||
return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Attributes Dictionary
|
||||
|
||||
|
||||
/**
|
||||
Delegate can call from within @c XMLStartElement. Returns @c nil if @c numberOfAttributes @c < @c 1.
|
||||
*/
|
||||
- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes numberOfAttributes:(NSInteger)numberOfAttributes {
|
||||
|
||||
if (numberOfAttributes < 1 || !attributes) {
|
||||
@@ -178,8 +202,7 @@ static xmlSAXHandler saxHandlerStruct;
|
||||
NSMutableDictionary *d = [NSMutableDictionary new];
|
||||
|
||||
@autoreleasepool {
|
||||
NSInteger i = 0, j = 0;
|
||||
for (i = 0, j = 0; i < numberOfAttributes; i++, j+=5) {
|
||||
for (NSInteger i = 0, j = 0; i < numberOfAttributes; i++, j+=5) {
|
||||
|
||||
NSUInteger lenValue = (NSUInteger)(attributes[j + 4] - attributes[j + 3]);
|
||||
NSString *value = nil;
|
||||
@@ -210,29 +233,48 @@ static xmlSAXHandler saxHandlerStruct;
|
||||
}
|
||||
}
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
/**
|
||||
Delegate can call from within @c XMLStartElement. Returns @c nil if @c numberOfAttributes @c < @c 1.
|
||||
*/
|
||||
- (NSDictionary *)attributesDictionaryHTML:(const xmlChar **)attributes {
|
||||
|
||||
if (!attributes) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSMutableDictionary *d = [NSMutableDictionary new];
|
||||
NSInteger ix = 0;
|
||||
NSString *currentKey = nil;
|
||||
while (true) {
|
||||
|
||||
const xmlChar *oneAttribute = attributes[ix];
|
||||
ix++;
|
||||
|
||||
if (!currentKey && !oneAttribute) {
|
||||
break;
|
||||
}
|
||||
if (!currentKey) {
|
||||
currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute];
|
||||
}
|
||||
else {
|
||||
NSString *value = nil;
|
||||
if (oneAttribute) {
|
||||
value = [NSString stringWithUTF8String:(const char *)oneAttribute];
|
||||
}
|
||||
d[currentKey] = (value ? value : @"");
|
||||
currentKey = nil;
|
||||
}
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Equal Tags
|
||||
|
||||
BOOL RSSAXEqualTags(const xmlChar *localName, const char *tag, NSInteger tagLength) {
|
||||
|
||||
if (!localName) {
|
||||
return NO;
|
||||
}
|
||||
return !strncmp((const char *)localName, tag, (size_t)tagLength);
|
||||
}
|
||||
|
||||
BOOL RSSAXEqualBytes(const void *bytes1, const void *bytes2, NSUInteger length) {
|
||||
|
||||
return memcmp(bytes1, bytes2, length) == 0;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Callbacks
|
||||
|
||||
|
||||
- (void)xmlEndDocument {
|
||||
|
||||
@autoreleasepool {
|
||||
@@ -261,50 +303,72 @@ BOOL RSSAXEqualBytes(const void *bytes1, const void *bytes2, NSUInteger length)
|
||||
|
||||
- (void)xmlStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(int)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(int)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.delegateRespondsToStartElementMethod) {
|
||||
|
||||
if (self.delegateRespondsToStartElementMethod) {
|
||||
@autoreleasepool {
|
||||
[self.delegate saxParser:self XMLStartElement:localName prefix:prefix uri:uri numberOfNamespaces:numberOfNamespaces namespaces:namespaces numberOfAttributes:numberOfAttributes numberDefaulted:numberDefaulted attributes:attributes];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)xmlStartHTMLElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
|
||||
|
||||
if (self.delegateRespondsToStartElementMethod) {
|
||||
@autoreleasepool {
|
||||
[self.delegate saxParser:self XMLStartElement:localName attributes:attributes];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)xmlEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.delegateRespondsToEndElementMethod) {
|
||||
[self.delegate saxParser:self XMLEndElement:localName prefix:prefix uri:uri];
|
||||
}
|
||||
|
||||
[self endStoringCharacters];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)xmlEndHTMLElement:(const xmlChar *)localName {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.delegateRespondsToEndElementMethod) {
|
||||
[self.delegate saxParser:self XMLEndElement:localName];
|
||||
}
|
||||
[self endStoringCharacters];
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
|
||||
static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) {
|
||||
|
||||
[(__bridge RSSAXParser *)context xmlStartElement:localname prefix:prefix uri:URI numberOfNamespaces:nb_namespaces namespaces:namespaces numberOfAttributes:nb_attributes numberDefaulted:nb_defaulted attributes:attributes];
|
||||
}
|
||||
|
||||
|
||||
static void endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) {
|
||||
[(__bridge RSSAXParser *)context xmlEndElement:localname prefix:prefix uri:URI];
|
||||
}
|
||||
|
||||
|
||||
static void charactersFoundSAX(void *context, const xmlChar *ch, int len) {
|
||||
[(__bridge RSSAXParser *)context xmlCharactersFound:ch length:(NSUInteger)len];
|
||||
}
|
||||
|
||||
|
||||
static void endDocumentSAX(void *context) {
|
||||
[(__bridge RSSAXParser *)context xmlEndDocument];
|
||||
}
|
||||
|
||||
static void startElementSAX_HTML(void *context, const xmlChar *localname, const xmlChar **attributes) {
|
||||
[(__bridge RSSAXParser *)context xmlStartHTMLElement:localname attributes:attributes];
|
||||
}
|
||||
|
||||
static void endElementSAX_HTML(void *context, const xmlChar *localname) {
|
||||
[(__bridge RSSAXParser *)context xmlEndHTMLElement:localname];
|
||||
}
|
||||
|
||||
|
||||
static xmlSAXHandler saxHandlerStruct = {
|
||||
nil, /* internalSubset */
|
||||
@@ -321,8 +385,8 @@ static xmlSAXHandler saxHandlerStruct = {
|
||||
nil, /* setDocumentLocator */
|
||||
nil, /* startDocument */
|
||||
endDocumentSAX, /* endDocument */
|
||||
nil, /* startElement*/
|
||||
nil, /* endElement */
|
||||
startElementSAX_HTML, /* startElement*/
|
||||
endElementSAX_HTML, /* endElement */
|
||||
nil, /* reference */
|
||||
charactersFoundSAX, /* characters */
|
||||
nil, /* ignorableWhitespace */
|
||||
@@ -340,13 +404,3 @@ static xmlSAXHandler saxHandlerStruct = {
|
||||
endElementSAX, /* endElementNs */
|
||||
nil /* serror */
|
||||
};
|
||||
|
||||
|
||||
void RSSAXInitLibXMLParser(void) {
|
||||
|
||||
static dispatch_once_t onceToken;
|
||||
dispatch_once(&onceToken, ^{
|
||||
xmlInitParser();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,36 +1,48 @@
|
||||
//
|
||||
// RSXML.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 7/12/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
|
||||
|
||||
#import <RSXML/RSSAXParser.h>
|
||||
// General
|
||||
#import <RSXML/RSXMLError.h>
|
||||
#import <RSXML/NSString+RSXML.h>
|
||||
#import <RSXML/RSDateParser.h>
|
||||
#import <RSXML/RSXMLData.h>
|
||||
#import <RSXML/RSXMLParser.h>
|
||||
|
||||
// RSS & Atom Feeds
|
||||
#import <RSXML/RSFeedParser.h>
|
||||
#import <RSXML/FeedParser.h>
|
||||
#import <RSXML/RSAtomParser.h>
|
||||
#import <RSXML/RSRSSParser.h>
|
||||
#import <RSXML/RSParsedFeed.h>
|
||||
#import <RSXML/RSParsedArticle.h>
|
||||
|
||||
// OPML
|
||||
#import <RSXML/RSOPMLParser.h>
|
||||
#import <RSXML/RSOPMLItem.h>
|
||||
|
||||
#import <RSXML/RSXMLError.h>
|
||||
|
||||
#import <RSXML/NSString+RSXML.h>
|
||||
#import <RSXML/RSDateParser.h>
|
||||
|
||||
// HTML
|
||||
|
||||
#import <RSXML/RSSAXHTMLParser.h>
|
||||
|
||||
#import <RSXML/RSHTMLMetadataParser.h>
|
||||
#import <RSXML/RSHTMLMetadata.h>
|
||||
#import <RSXML/RSHTMLLinkParser.h>
|
||||
#import <RSXML/RSHTMLMetadata.h>
|
||||
|
||||
@@ -1,22 +1,41 @@
|
||||
//
|
||||
// RSXMLData.h
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 8/24/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
#import "RSXMLParser.h"
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
@class RSXMLParser;
|
||||
|
||||
@interface RSXMLData : NSObject
|
||||
@interface RSXMLData <__covariant T : RSXMLParser *> : NSObject
|
||||
@property (nonatomic, readonly, nonnull) NSString *urlString;
|
||||
@property (nonatomic, readonly, nullable) NSData *data;
|
||||
@property (nonatomic, readonly, nullable) Class parserClass;
|
||||
@property (nonatomic, readonly, nullable) NSError *parserError;
|
||||
|
||||
- (instancetype)initWithData:(NSData *)data urlString:(NSString *)urlString;
|
||||
- (instancetype)initWithData:(NSData * _Nonnull)data urlString:(NSString * _Nonnull)urlString;
|
||||
|
||||
@property (nonatomic, readonly) NSData *data;
|
||||
@property (nonatomic, readonly) NSString *urlString;
|
||||
- (T _Nullable)getParser;
|
||||
- (BOOL)canParseData;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
|
||||
@@ -1,28 +1,212 @@
|
||||
//
|
||||
// RSXMLData.m
|
||||
// RSXML
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Created by Brent Simmons on 8/24/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
// Copyright (c) 2016 Brent Simmons
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "RSXMLData.h"
|
||||
#import "RSXMLError.h"
|
||||
// Parser classes
|
||||
#import "RSRSSParser.h"
|
||||
#import "RSAtomParser.h"
|
||||
#import "RSOPMLParser.h"
|
||||
#import "RSHTMLMetadataParser.h"
|
||||
|
||||
@implementation RSXMLData
|
||||
|
||||
static const NSUInteger minNumberOfBytesToSearch = 20;
|
||||
static const NSInteger numberOfCharactersToSearch = 4096;
|
||||
|
||||
- (instancetype)initWithData:(NSData *)data urlString:(NSString *)urlString {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
if (self) {
|
||||
_data = data;
|
||||
_urlString = urlString;
|
||||
_parserError = nil;
|
||||
_parserClass = [self determineParserClass]; // will set error
|
||||
if (!_parserClass && _parserError)
|
||||
_data = nil;
|
||||
}
|
||||
|
||||
_data = data;
|
||||
_urlString = urlString;
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
/**
|
||||
Get location of @c str in data. May be inaccurate since UTF8 uses multi-byte characters.
|
||||
*/
|
||||
- (NSInteger)findCString:(const char*)str {
|
||||
char *foundStr = strnstr(_data.bytes, str, numberOfCharactersToSearch);
|
||||
if (foundStr == NULL) {
|
||||
return NSNotFound;
|
||||
}
|
||||
return foundStr - (char*)_data.bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
@return @c YES if any of the provided tags is found within the first 4096 bytes.
|
||||
*/
|
||||
- (BOOL)matchAny:(const char*[])tags count:(int)len {
|
||||
for (int i = 0; i < len; i++) {
|
||||
if ([self findCString:tags[i]] != NSNotFound) {
|
||||
return YES;
|
||||
}
|
||||
}
|
||||
return NO;
|
||||
}
|
||||
|
||||
/**
|
||||
@return @c YES if all of the provided tags are found within the first 4096 bytes.
|
||||
*/
|
||||
- (BOOL)matchAll:(const char*[])tags count:(int)len {
|
||||
for (int i = 0; i < len; i++) {
|
||||
if ([self findCString:tags[i]] == NSNotFound) {
|
||||
return NO;
|
||||
}
|
||||
}
|
||||
return YES;
|
||||
}
|
||||
|
||||
/**
|
||||
Do a fast @c strnstr() search on the @c char* data.
|
||||
All strings must match exactly and in the same order provided.
|
||||
*/
|
||||
- (BOOL)matchAllInCorrectOrder:(const char*[])tags count:(int)len {
|
||||
NSInteger oldPos = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
NSInteger newPos = [self findCString:tags[i]];
|
||||
if (newPos == NSNotFound || newPos < oldPos) {
|
||||
return NO;
|
||||
}
|
||||
oldPos = newPos;
|
||||
}
|
||||
return YES;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Determine XML Parser
|
||||
|
||||
|
||||
/**
|
||||
Try to find the correct parser for the underlying data. Will return @c nil and @c error if couldn't be determined.
|
||||
|
||||
@return Parser class: @c RSRSSParser, @c RSAtomParser, @c RSOPMLParser or @c RSHTMLMetadataParser.
|
||||
*/
|
||||
- (nullable Class)determineParserClass {
|
||||
// TODO: check for things like images and movies and return nil.
|
||||
if (!_data || _data.length < minNumberOfBytesToSearch) {
|
||||
// TODO: check size, type, etc.
|
||||
_parserError = RSXMLMakeError(RSXMLErrorNoData);
|
||||
return nil;
|
||||
}
|
||||
if (NSNotFound == [self findCString:"<"]) {
|
||||
_parserError = RSXMLMakeError(RSXMLErrorMissingLeftCaret);
|
||||
return nil;
|
||||
}
|
||||
if ([self matchAll:(const char*[]){"<rss", "<channel"} count:2]) { // RSS
|
||||
return [RSRSSParser class];
|
||||
}
|
||||
if ([self matchAll:(const char*[]){"<feed", "<entry"} count:2]) { // Atom
|
||||
return [RSAtomParser class];
|
||||
}
|
||||
if (NSNotFound != [self findCString:"<rdf:RDF"]) {
|
||||
return [RSRSSParser class]; //TODO: parse RDF feeds ... for now, use RSS parser.
|
||||
}
|
||||
if ([self matchAll:(const char*[]){"<opml", "<outline"} count:2]) {
|
||||
return [RSOPMLParser class];
|
||||
}
|
||||
if ([self matchAny:(const char*[]){"<html", "<HTML", "<body", "<meta", "doctype html", "DOCTYPE html", "DOCTYPE HTML"} count:7]) {
|
||||
// Won’t catch every single case, which is fine.
|
||||
return [RSHTMLMetadataParser class];
|
||||
}
|
||||
if ([self findCString:"<errors xmlns='http://schemas.google"] != NSNotFound) {
|
||||
_parserError = RSXMLMakeError(RSXMLErrorContainsXMLErrorsTag);
|
||||
return nil;
|
||||
}
|
||||
// else: try slower NSString conversion and search case insensitive.
|
||||
return [self determineParserClassSafeAndSlow];
|
||||
}
|
||||
|
||||
/**
|
||||
Create @c NSString object from @c .data and try to parse it as UTF8 and UTF16.
|
||||
Then search for each parser if the tags match (case insensitive) in the same order provided.
|
||||
*/
|
||||
- (nullable Class)determineParserClassSafeAndSlow {
|
||||
@autoreleasepool {
|
||||
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)_data.bytes length:_data.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
|
||||
if (!s) {
|
||||
s = [[NSString alloc] initWithBytesNoCopy:(void *)_data.bytes length:_data.length encoding:NSUnicodeStringEncoding freeWhenDone:NO];
|
||||
}
|
||||
if (!s) {
|
||||
_parserError = RSXMLMakeError(RSXMLErrorNoSuitableParser);
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
|
||||
if (s.length < numberOfCharactersToSearch) {
|
||||
rangeToSearch.length = s.length;
|
||||
}
|
||||
|
||||
for (Class parserClass in [self listOfParserClasses]) {
|
||||
NSArray<const NSString *> *tags = [parserClass parserRequireOrderedTags];
|
||||
|
||||
NSUInteger oldPos = 0;
|
||||
for (NSString *tag in tags) {
|
||||
NSUInteger newPos = [s rangeOfString:tag options:NSCaseInsensitiveSearch range:rangeToSearch].location;
|
||||
if (newPos == NSNotFound || newPos < oldPos) {
|
||||
oldPos = NSNotFound;
|
||||
break;
|
||||
}
|
||||
oldPos = newPos;
|
||||
}
|
||||
if (oldPos != NSNotFound) {
|
||||
return parserClass;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Try RSS anyway? libxml would return a parsing error
|
||||
_parserError = RSXMLMakeError(RSXMLErrorNoSuitableParser);
|
||||
return nil;
|
||||
}
|
||||
|
||||
/// @return List of parsers. @c RSRSSParser, @c RSAtomParser, @c RSOPMLParser.
|
||||
- (NSArray *)listOfParserClasses {
|
||||
static NSArray *gParserClasses = nil;
|
||||
static dispatch_once_t onceToken;
|
||||
dispatch_once(&onceToken, ^{
|
||||
gParserClasses = @[[RSRSSParser class], [RSAtomParser class], [RSOPMLParser class]];
|
||||
});
|
||||
return gParserClasses;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Check Methods to Determine Parser Type
|
||||
|
||||
|
||||
/// @return Kind of @c RSXMLParser or @c nil if no suitable parser found.
|
||||
- (id)getParser {
|
||||
return [[_parserClass alloc] initWithXMLData:self];
|
||||
}
|
||||
|
||||
/// @return @c YES if any parser, regardless of type, is suitable.
|
||||
- (BOOL)canParseData {
|
||||
return (_parserClass != nil && _parserError == nil);
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,21 +1,47 @@
|
||||
//
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
#import <libxml/xmlerror.h>
|
||||
|
||||
extern NSErrorDomain kLIBXMLParserErrorDomain;
|
||||
extern NSErrorDomain kRSXMLParserErrorDomain;
|
||||
extern NSErrorDomain const kLIBXMLParserErrorDomain;
|
||||
extern NSErrorDomain const kRSXMLParserErrorDomain;
|
||||
|
||||
/// Error codes for RSXML error domain @c (kRSXMLParserErrorDomain)
|
||||
typedef NS_ENUM(NSInteger, RSXMLError) {
|
||||
typedef NS_ERROR_ENUM(kRSXMLParserErrorDomain, RSXMLError) {
|
||||
/// Error codes
|
||||
RSXMLErrorNoData = 100,
|
||||
RSXMLErrorMissingLeftCaret = 110,
|
||||
RSXMLErrorProbablyHTML = 120,
|
||||
RSXMLErrorContainsXMLErrorsTag = 130,
|
||||
RSXMLErrorNoSuitableParser = 140,
|
||||
RSXMLErrorFileNotOPML = 1024 // original value
|
||||
// 1xx: general xml parsing error
|
||||
RSXMLErrorNoData = 110, // input length is less than 20 characters
|
||||
RSXMLErrorInputEncoding = 111, // input is not decodable with UTF8 or UTF16 encoding
|
||||
RSXMLErrorMissingLeftCaret = 120, // input does not contain any '<' character
|
||||
RSXMLErrorContainsXMLErrorsTag = 130, // input contains: "<errors xmlns='http://schemas.google"
|
||||
RSXMLErrorNoSuitableParser = 140, // none of the provided parsers can read the data
|
||||
// 2xx: xml content <-> parser, mismatch
|
||||
RSXMLErrorExpectingFeed = 210,
|
||||
RSXMLErrorExpectingHTML = 220,
|
||||
RSXMLErrorExpectingOPML = 230
|
||||
};
|
||||
|
||||
void RSXMLSetError(NSError **error, RSXMLError code, NSString *filename);
|
||||
NSError * RSXMLMakeError(RSXMLError code, NSString *filename);
|
||||
NSError * RSXMLMakeError(RSXMLError code);
|
||||
NSError * RSXMLMakeErrorWrongParser(RSXMLError code, RSXMLError expected);
|
||||
NSError * RSXMLMakeErrorFromLIBXMLError(xmlErrorPtr err);
|
||||
|
||||
@@ -1,43 +1,73 @@
|
||||
//
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "RSXMLError.h"
|
||||
|
||||
NSErrorDomain kLIBXMLParserErrorDomain = @"LIBXMLParserErrorDomain";
|
||||
NSErrorDomain kRSXMLParserErrorDomain = @"RSXMLParserErrorDomain";
|
||||
const NSErrorDomain kLIBXMLParserErrorDomain = @"LIBXMLParserErrorDomain";
|
||||
const NSErrorDomain kRSXMLParserErrorDomain = @"RSXMLParserErrorDomain";
|
||||
|
||||
NSString * getErrorMessageForRSXMLError(RSXMLError code, id paramA);
|
||||
NSString * getErrorMessageForRSXMLError(RSXMLError code, id paramA) {
|
||||
const char * parserDescriptionForError(RSXMLError code);
|
||||
const char * parserDescriptionForError(RSXMLError code) {
|
||||
switch (code) {
|
||||
case RSXMLErrorExpectingHTML: return "HTML data";
|
||||
case RSXMLErrorExpectingOPML: return "OPML data";
|
||||
case RSXMLErrorExpectingFeed: return "RSS or Atom feed";
|
||||
default: return "Unknown format";
|
||||
}
|
||||
}
|
||||
|
||||
NSString * getErrorMessageForRSXMLError(RSXMLError code, RSXMLError expected);
|
||||
NSString * getErrorMessageForRSXMLError(RSXMLError code, RSXMLError expected) {
|
||||
switch (code) { // switch statement will warn if an enum value is missing
|
||||
case RSXMLErrorNoData:
|
||||
return @"Couldn't parse feed. No data available.";
|
||||
return @"Can't parse data. Empty data.";
|
||||
case RSXMLErrorInputEncoding:
|
||||
return @"Can't parse data. Input encoding cannot be converted to UTF-8 / UTF-16.";
|
||||
case RSXMLErrorMissingLeftCaret:
|
||||
return @"Couldn't parse feed. Missing left caret character ('<').";
|
||||
case RSXMLErrorProbablyHTML:
|
||||
return @"Couldn't parse feed. Expecting XML data but found html data.";
|
||||
return @"Can't parse XML. Missing left caret character ('<').";
|
||||
case RSXMLErrorContainsXMLErrorsTag:
|
||||
return @"Couldn't parse feed. XML contains 'errors' tag.";
|
||||
return @"Can't parse XML. XML contains 'errors' tag.";
|
||||
case RSXMLErrorNoSuitableParser:
|
||||
return @"Couldn't parse feed. No suitable parser found. XML document not well-formed.";
|
||||
case RSXMLErrorFileNotOPML:
|
||||
if (paramA) {
|
||||
return [NSString stringWithFormat:@"The file ‘%@’ can't be parsed because it's not an OPML file.", paramA];
|
||||
}
|
||||
return @"The file can't be parsed because it's not an OPML file.";
|
||||
return @"Can't parse XML. No suitable parser found. Document not well-formed?";
|
||||
case RSXMLErrorExpectingHTML:
|
||||
case RSXMLErrorExpectingOPML:
|
||||
case RSXMLErrorExpectingFeed:
|
||||
return [NSString stringWithFormat:@"Can't parse XML. %s expected, but %s found.",
|
||||
parserDescriptionForError(code), parserDescriptionForError(expected)];
|
||||
}
|
||||
}
|
||||
|
||||
void RSXMLSetError(NSError **error, RSXMLError code, NSString *filename) {
|
||||
if (error) {
|
||||
*error = RSXMLMakeError(code, filename);
|
||||
}
|
||||
NSError * RSXMLMakeError(RSXMLError code) {
|
||||
return RSXMLMakeErrorWrongParser(code, RSXMLErrorNoData);
|
||||
}
|
||||
|
||||
NSError * RSXMLMakeError(RSXMLError code, NSString *filename) {
|
||||
NSError * RSXMLMakeErrorWrongParser(RSXMLError code, RSXMLError expected) {
|
||||
return [NSError errorWithDomain:kRSXMLParserErrorDomain code:code
|
||||
userInfo:@{NSLocalizedDescriptionKey: getErrorMessageForRSXMLError(code, nil)}];
|
||||
userInfo:@{NSLocalizedDescriptionKey: getErrorMessageForRSXMLError(code, expected)}];
|
||||
}
|
||||
|
||||
NSError * RSXMLMakeErrorFromLIBXMLError(xmlErrorPtr err) {
|
||||
if (err) {
|
||||
if (err && err->level == XML_ERR_FATAL) {
|
||||
int errCode = err->code;
|
||||
char * msg = err->message;
|
||||
//if (err->level == XML_ERR_FATAL)
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
//
|
||||
// RSXMLInternal.h
|
||||
// RSXML
|
||||
//
|
||||
// Created by Brent Simmons on 12/26/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
@import Foundation;
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
BOOL RSXMLIsEmpty(id _Nullable obj);
|
||||
BOOL RSXMLStringIsEmpty(NSString * _Nullable s);
|
||||
|
||||
|
||||
@interface NSString (RSXMLInternal)
|
||||
|
||||
- (NSString *)rsxml_md5HashString;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@interface NSDictionary (RSXMLInternal)
|
||||
|
||||
- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
//
|
||||
// RSXMLInternal.m
|
||||
// RSXML
|
||||
//
|
||||
// Created by Brent Simmons on 12/26/16.
|
||||
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
#import <CommonCrypto/CommonDigest.h>
|
||||
#import "RSXMLInternal.h"
|
||||
|
||||
|
||||
static BOOL RSXMLIsNil(id obj) {
|
||||
|
||||
return obj == nil || obj == [NSNull null];
|
||||
}
|
||||
|
||||
BOOL RSXMLIsEmpty(id obj) {
|
||||
|
||||
if (RSXMLIsNil(obj)) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
if ([obj respondsToSelector:@selector(count)]) {
|
||||
return [obj count] < 1;
|
||||
}
|
||||
|
||||
if ([obj respondsToSelector:@selector(length)]) {
|
||||
return [obj length] < 1;
|
||||
}
|
||||
|
||||
return NO; /*Shouldn't get here very often.*/
|
||||
}
|
||||
|
||||
BOOL RSXMLStringIsEmpty(NSString *s) {
|
||||
|
||||
return RSXMLIsNil(s) || s.length < 1;
|
||||
}
|
||||
|
||||
|
||||
@implementation NSString (RSXMLInternal)
|
||||
|
||||
- (NSData *)rsxml_md5Hash {
|
||||
|
||||
NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding];
|
||||
unsigned char hash[CC_MD5_DIGEST_LENGTH];
|
||||
CC_MD5(data.bytes, (CC_LONG)data.length, hash);
|
||||
|
||||
return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH];
|
||||
}
|
||||
|
||||
- (NSString *)rsxml_md5HashString {
|
||||
|
||||
NSData *md5Data = [self rsxml_md5Hash];
|
||||
const Byte *bytes = md5Data.bytes;
|
||||
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]];
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation NSDictionary (RSXMLInternal)
|
||||
|
||||
|
||||
- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key {
|
||||
|
||||
id obj = self[key];
|
||||
if (obj) {
|
||||
return obj;
|
||||
}
|
||||
|
||||
for (NSString *oneKey in self.allKeys) {
|
||||
|
||||
if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) {
|
||||
return self[oneKey];
|
||||
}
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
@end
|
||||
69
RSXML/RSXMLParser.h
Normal file
69
RSXML/RSXMLParser.h
Normal file
@@ -0,0 +1,69 @@
|
||||
//
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
@import Foundation;
|
||||
#import "RSSAXParser.h"
|
||||
|
||||
#define EqualBytes(bytes1, bytes2, length) (memcmp(bytes1, bytes2, length) == 0)
|
||||
//#define EqualBytes(bytes1, bytes2, length) (!strncmp(bytes1, bytes2, length))
|
||||
|
||||
@class RSXMLData;
|
||||
|
||||
|
||||
@protocol RSXMLParserDelegate <NSObject>
|
||||
@optional
|
||||
/**
|
||||
A subclass may return a list of tags that the data @c (RSXMLData) should include.
|
||||
Only if all strings are found (in correct order) the parser will be selected.
|
||||
|
||||
@note This method will only be called if the original data has some weird encoding.
|
||||
@c RSXMLData will first try to convert the data to an @c UTF8 string, then @c UTF16.
|
||||
If both conversions fail the parser will be deemed as not suitable for this data.
|
||||
*/
|
||||
+ (NSArray<const NSString *> *)parserRequireOrderedTags;
|
||||
/// @return Return @c NO to cancel parsing before it even started. E.g. check if parser is of correct type.
|
||||
- (BOOL)xmlParserWillStartParsing;
|
||||
|
||||
@required
|
||||
/// @return @c YES if parser supports parsing feeds (RSS or Atom).
|
||||
+ (BOOL)isFeedParser;
|
||||
/// @return @c YES if parser supports parsing OPML files.
|
||||
+ (BOOL)isOPMLParser;
|
||||
/// @return @c YES if parser supports parsing HTML files.
|
||||
+ (BOOL)isHTMLParser;
|
||||
/// Keeps an internal pointer to the @c RSXMLData and initializes a new @c RSSAXParser.
|
||||
- (instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData;
|
||||
/// Will be called after the parsing is finished. @return Reference to parsed object.
|
||||
- (id)xmlParserWillReturnDocument;
|
||||
@end
|
||||
|
||||
|
||||
@interface RSXMLParser<__covariant T> : NSObject <RSXMLParserDelegate, RSSAXParserDelegate>
|
||||
@property (nonatomic, readonly, nonnull, copy) NSString *documentURI;
|
||||
|
||||
- (T _Nullable)parseSync:(NSError ** _Nullable)error;
|
||||
- (void)parseAsync:(void(^)(T _Nullable parsedDocument, NSError * _Nullable error))block;
|
||||
- (BOOL)canParse;
|
||||
|
||||
@end
|
||||
|
||||
143
RSXML/RSXMLParser.m
Normal file
143
RSXML/RSXMLParser.m
Normal file
@@ -0,0 +1,143 @@
|
||||
//
|
||||
// MIT License (MIT)
|
||||
//
|
||||
// Copyright (c) 2018 Oleg Geier
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#import "RSXMLParser.h"
|
||||
#import "RSXMLData.h"
|
||||
#import "RSXMLError.h"
|
||||
|
||||
@interface RSXMLParser()
|
||||
@property (nonatomic) RSSAXParser *parser;
|
||||
@property (nonatomic) NSData *xmlData;
|
||||
@property (nonatomic, copy) NSError *xmlInputError;
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSXMLParser
|
||||
|
||||
+ (BOOL)isFeedParser { return NO; } // override
|
||||
+ (BOOL)isOPMLParser { return NO; } // override
|
||||
+ (BOOL)isHTMLParser { return NO; } // override
|
||||
- (id)xmlParserWillReturnDocument { return nil; } // override
|
||||
|
||||
/**
|
||||
Designated initializer. Runs a check whether it matches the detected parser in @c RSXMLData.
|
||||
*/
|
||||
- (instancetype)initWithXMLData:(nonnull RSXMLData *)xmlData {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_documentURI = [xmlData.urlString copy];
|
||||
_xmlInputError = [xmlData.parserError copy];
|
||||
[self checkIfParserMatches:xmlData.parserClass];
|
||||
_xmlData = xmlData.data;
|
||||
if (!_xmlData) {
|
||||
_xmlInputError = RSXMLMakeError(RSXMLErrorNoData);
|
||||
}
|
||||
_parser = [[RSSAXParser alloc] initWithDelegate:self];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
/**
|
||||
Parse the XML data on whatever thread this method is called.
|
||||
|
||||
@param error Sets @c error if parser gets unrecognized data or libxml runs into a parsing error.
|
||||
@return The parsed object. The object type depends on the underlying data. @c RSParsedFeed, @c RSOPMLItem or @c RSHTMLMetadata.
|
||||
*/
|
||||
- (id _Nullable)parseSync:(NSError **)error {
|
||||
if (_xmlInputError) {
|
||||
if (error) *error = _xmlInputError;
|
||||
return nil;
|
||||
}
|
||||
|
||||
if ([self respondsToSelector:@selector(xmlParserWillStartParsing)] && ![self xmlParserWillStartParsing])
|
||||
return nil;
|
||||
|
||||
@autoreleasepool {
|
||||
xmlResetLastError();
|
||||
[_parser parseBytes:_xmlData.bytes numberOfBytes:_xmlData.length];
|
||||
if (error) {
|
||||
*error = RSXMLMakeErrorFromLIBXMLError(xmlGetLastError());
|
||||
xmlResetLastError();
|
||||
}
|
||||
}
|
||||
return [self xmlParserWillReturnDocument];
|
||||
}
|
||||
|
||||
/**
|
||||
Dispatch new background thread, parse the data synchroniously on the background thread and exec callback on the main thread.
|
||||
*/
|
||||
- (void)parseAsync:(void(^)(id parsedDocument, NSError *error))block {
|
||||
dispatch_async(dispatch_get_global_queue(QOS_CLASS_UTILITY, 0), ^{ // QOS_CLASS_DEFAULT
|
||||
@autoreleasepool {
|
||||
NSError *error;
|
||||
id obj = [self parseSync:&error];
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
block(obj, error);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// @return @c YES if @c .xmlInputError is not @c nil.
|
||||
- (BOOL)canParse {
|
||||
return (self.xmlInputError != nil);
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Check Parser Type Matches
|
||||
|
||||
|
||||
/**
|
||||
@return Returns either @c ExpectingFeed, @c ExpectingOPML, @c ExpectingHTML.
|
||||
@return @c RSXMLErrorNoData for an unexpected class (e.g., if @c RSXMLParser is used directly).
|
||||
*/
|
||||
- (RSXMLError)getExpectedErrorForClass:(Class<RSXMLParserDelegate>)cls {
|
||||
if ([cls isFeedParser])
|
||||
return RSXMLErrorExpectingFeed;
|
||||
if ([cls isOPMLParser])
|
||||
return RSXMLErrorExpectingOPML;
|
||||
if ([cls isHTMLParser])
|
||||
return RSXMLErrorExpectingHTML;
|
||||
return RSXMLErrorNoData; // will result in 'Unknown format'
|
||||
}
|
||||
|
||||
/**
|
||||
Check whether parsing class matches the expected parsing class. If not set @c .xmlInputError along the way.
|
||||
|
||||
@return @c YES if @c parserClass matches, @c NO otherwise. If @c NO is returned, @c parserError is set also.
|
||||
*/
|
||||
- (BOOL)checkIfParserMatches:(Class<RSXMLParserDelegate>)xmlParserClass {
|
||||
if (!xmlParserClass)
|
||||
return NO;
|
||||
if (xmlParserClass != [self class]) { // && !_xmlInputError
|
||||
RSXMLError current = [self getExpectedErrorForClass:[self class]];
|
||||
RSXMLError expected = [self getExpectedErrorForClass:xmlParserClass];
|
||||
if (current != expected) {
|
||||
_xmlInputError = RSXMLMakeErrorWrongParser(current, expected);
|
||||
return NO;
|
||||
}
|
||||
}
|
||||
return YES; // only if no error was set (not now, nor before)
|
||||
}
|
||||
|
||||
@end
|
||||
Reference in New Issue
Block a user