From 62c5bef463105165bfeb4cf1d02534092849fd31 Mon Sep 17 00:00:00 2001 From: relikd Date: Thu, 27 Dec 2018 21:11:59 +0100 Subject: [PATCH] Refactoring to v.2.0 --- LICENSE | 3 +- README.md | 71 +- RSXML.xcodeproj/project.pbxproj | 107 +-- RSXML/FeedParser.h | 24 - RSXML/NSDictionary+RSXML.h | 28 + RSXML/NSDictionary+RSXML.m | 41 + RSXML/NSString+RSXML.h | 26 +- RSXML/NSString+RSXML.m | 68 +- RSXML/RSAtomParser.h | 31 +- RSXML/RSAtomParser.m | 750 ++++++------------ RSXML/RSDateParser.h | 23 +- RSXML/RSDateParser.m | 23 +- RSXML/RSFeedParser.h | 49 +- RSXML/RSFeedParser.m | 253 +----- RSXML/RSHTMLLinkParser.h | 46 +- RSXML/RSHTMLLinkParser.m | 184 ++--- RSXML/RSHTMLMetadata.h | 75 +- RSXML/RSHTMLMetadata.m | 287 ++----- RSXML/RSHTMLMetadataParser.h | 40 +- RSXML/RSHTMLMetadataParser.m | 173 ++-- RSXML/RSOPMLItem.h | 22 + RSXML/RSOPMLItem.m | 28 +- RSXML/RSOPMLParser.h | 42 +- RSXML/RSOPMLParser.m | 215 ++--- RSXML/RSParsedArticle.h | 30 +- RSXML/RSParsedArticle.m | 121 +-- RSXML/RSParsedFeed.h | 38 +- RSXML/RSParsedFeed.m | 60 +- RSXML/RSRSSParser.h | 31 +- RSXML/RSRSSParser.m | 590 +++++--------- RSXML/RSSAXHTMLParser.h | 49 -- RSXML/RSSAXHTMLParser.m | 315 -------- RSXML/RSSAXParser.h | 68 +- RSXML/RSSAXParser.m | 256 +++--- RSXML/RSXML.h | 44 +- RSXML/RSXMLData.h | 41 +- RSXML/RSXMLData.m | 206 ++++- RSXML/RSXMLError.h | 48 +- RSXML/RSXMLError.m | 74 +- RSXML/RSXMLInternal.h | 31 - RSXML/RSXMLInternal.m | 83 -- RSXML/RSXMLParser.h | 69 ++ RSXML/RSXMLParser.m | 143 ++++ RSXMLTests/RSDateParserTests.m | 23 +- RSXMLTests/RSEntityTests.m | 24 +- RSXMLTests/RSHTMLTests.m | 231 +++--- RSXMLTests/RSOPMLTests.m | 108 +-- RSXMLTests/RSXMLTests.m | 379 +++++---- ...DaringFireball.rss => DaringFireball.atom} | 0 RSXMLTests/Resources/TimerSearch.txt | 31 - 50 files changed, 2574 insertions(+), 3128 deletions(-) delete mode 100644 RSXML/FeedParser.h create mode 100644 RSXML/NSDictionary+RSXML.h create mode 100644 RSXML/NSDictionary+RSXML.m delete mode 100644 RSXML/RSSAXHTMLParser.h delete mode 100644 RSXML/RSSAXHTMLParser.m delete mode 100644 RSXML/RSXMLInternal.h delete mode 100644 RSXML/RSXMLInternal.m create mode 100644 RSXML/RSXMLParser.h create mode 100644 RSXML/RSXMLParser.m rename RSXMLTests/Resources/{DaringFireball.rss => DaringFireball.atom} (100%) delete mode 100644 RSXMLTests/Resources/TimerSearch.txt diff --git a/LICENSE b/LICENSE index d9348ee..9db78c7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ MIT License -Copyright (c) 2016 brentsimmons +Original work: Copyright (c) 2016 Brent Simmons +Modified work: Copyright (c) 2018 Oleg Geier Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index e6ed02c..34d386b 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,66 @@ # RSXML -This is utility code for parsing XML and HTML using libXML2’s SAX parser. +This is utility code for parsing XML and HTML using libXML2’s SAX parser. It does not depend on any other third-party frameworks and builds two targets: one for Mac, one for iOS. -It builds two framework targets: one for Mac, one for iOS. It does not depend on any other third-party frameworks. The code is Objective-C with ARC. +**Note:** This is an actively maintained fork of the [RSXML library by Brent Simmons](https://github.com/brentsimmons/RSXML). The original library seems to be inactive in favor of the new version [RSParser](https://github.com/brentsimmons/RSParser) which is written with Swift support in mind. If you prefer Swift you should go ahead and work with that project. However, the reason for this fork is to keep a version alive which is Objective-C only. -#### The gist -To parse XML, create an `RSSAXParserDelegate`. (There are examples in the framework that you can crib from.) -To parse HTML, create an `RSSAXHTMLParserDelegate`. (There are examples for this too.) +### Why use libXML2’s SAX API? -#### Goodies and Extras +Brent Simmons put much value on low memory footprint and fast parsing. With his own words: "RSXML was written to avoid allocating Objective-C objects except when absolutely needed. You'll note use of things like `memcmp` and `strncmp`". This promise will not be broken in future development. -There are three XML parsers included, for OPML, RSS, and Atom. To parse OPML, see `RSOPMLParser`. To parse RSS and Atom, see `RSFeedParser`. -These parsers may or may not be complete enough for your needs. You could, in theory, start writing an RSS reader just with these. (And, if you want to, go for it, with my blessing.) -There are two HTML parsers included. `RSHTMLMetadataParser` pulls metadata from the head section of an HTML document. `RSHTMLLinkParser` pulls all the links (anchors, <a href=…> tags) from an HTML document. +### Refactoring v.2.0 -Other possibly interesting things: +The refactoring that led to version 2.0 changed many things. With nearly all files touched, I would say roughly 80% of the code was updated. The parser architecture was rewritten and every parser is now a subclass of `RSXMLParser`. The parsing interface uses generic return types and some of the returned documents have changed as well. -`RSDateParser` makes it easy to parse dates in the formats found in various types of feeds. +In general, the performance did not change but if so only to get slightly better. However, the performance of the HTML metadata parser improved by 80% – 90% (by canceling the parse after the head tag). At the same time, heap allocations dropped to 50% – 30% for the test cases (same reason). + +In the previous version, the test case for parsing a non-opml file (with `RSOPMLParser`) took 13 seconds, whereas now, the parser cancels after a few milliseconds. + + + +## Usage + +``` +RSXMLData *xmlData = [[RSXMLData alloc] initWithData:d urlString:@"https://www.example.org"]; +// TODO: check xmlData.parserError +RSFeedParser *parser = [[RSFeedParser alloc] initWithXMLData:xmlData]; +// TODO: check [parser canParse] +// TODO: alternatively check error after parseSync: +NSError *parseError; +RSParsedFeed *document = [parser parseSync:&parseError]; +``` + +`RSXMLData` will return an error in `.parserError` if the provided data is not in XML format (see `RSXMLError` for possible reasons). The other point of failure is after initializing a parser with the `RSXMLData`. This will set an error if the parser does not match the underlying data (e.g., if you try to parse an `.opml` file with an Atom or RSS parser). + +If you don't care about the parser used to decode the data, `[xmlData getParser]` will return the most suitable parser. You can use that parser right away to call `parseSync:`. Anyway, you can also parse the XML file asynchronously with `parseAsync:`. + +``` +[[xmlData getParser] parseAsync:^(RSParsedFeed *parsedDocument, NSError *error) { + // process feed items ... +}]; +``` + + + +### Available parsers + +This library includes parsers for RSS, Atom, OPML, and HTML metadata. The latter will return links to feed URLs, icon files, or generally all anchor tags linking to whatever. Use `RSFeedParser` to parse a feed regardless of type (Atom: `RSAtomParser`, RSS: `RSRSSParser`). To parse `.opml` files use `RSOPMLParser`, and for `.html` files there are two available `RSHTMLMetadataParser` (icons and feed links) and `RSHTMLLinkParser` (all anchor tags). + +Depending on the parser the return value of `parseSync`/`parseAsync` is: `RSParsedFeed`, `RSOPMLItem`, `RSHTMLMetadata`, or `RSHTMLMetadataAnchor`. + +You can define the parser type by declaring it like this: `RSXMLData xmlData`. That won't force the selection of the parser, though. But `[xmlData getParser]` will return the correct type; which in turn will return the appropriate document type (same as using a specific parser in the first place). + + + +### Extras + +`RSDateParser` makes it easy to parse dates from various formats found in different feed types. `NSString+RSXML` decodes HTML entities. Also note: there are some unit tests. -#### Why use libXML2’s SAX API? - -SAX is kind of a pain because of all the state you have to manage. But it’s fastest and uses the least amount of memory. - -An alternative is to use `NSXMLParser`, which is event-driven like SAX. However, RSXML was written to avoid allocating Objective-C objects except when absolutely needed. You’ll note use of things like `memcp` and `strncmp`. - -Normally I avoid this kind of thing *strenuously*. I prefer to work at the highest level possible. - -But my more-than-a-decade of experience parsing XML has led me to this solution, which — last time I checked, which was, admittedly, a few years ago — was not only fastest but also uses the least memory. (The two things are related, of course: creating objects is bad for performance, so this code attempts to do the minimum possible.) - -All that low-level stuff is encapsulated, however. If you parse a feed, for instance, the caller gets an `RSParsedFeed` which contains `RSParsedArticle`s, and they’re standard Objective-C objects. It’s only inside your `RSSAXParserDelegate` and `RSSAXHTMLParserDelegate` where you’ll need to deal with C. \ No newline at end of file diff --git a/RSXML.xcodeproj/project.pbxproj b/RSXML.xcodeproj/project.pbxproj index 1005768..418ba98 100644 --- a/RSXML.xcodeproj/project.pbxproj +++ b/RSXML.xcodeproj/project.pbxproj @@ -7,6 +7,14 @@ objects = { /* Begin PBXBuildFile section */ + 54702A9821D407A00050A741 /* RSXMLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 54702A9621D4079F0050A741 /* RSXMLParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 54702A9921D407A00050A741 /* RSXMLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 54702A9621D4079F0050A741 /* RSXMLParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 54702A9A21D407A00050A741 /* RSXMLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 54702A9721D407A00050A741 /* RSXMLParser.m */; }; + 54702A9B21D407A00050A741 /* RSXMLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 54702A9721D407A00050A741 /* RSXMLParser.m */; }; + 54C707DB21D42B710029BFF1 /* NSDictionary+RSXML.m in Sources */ = {isa = PBXBuildFile; fileRef = 54C707D921D42B710029BFF1 /* NSDictionary+RSXML.m */; }; + 54C707DC21D42B710029BFF1 /* NSDictionary+RSXML.m in Sources */ = {isa = PBXBuildFile; fileRef = 54C707D921D42B710029BFF1 /* NSDictionary+RSXML.m */; }; + 54C707DD21D42B710029BFF1 /* NSDictionary+RSXML.h in Headers */ = {isa = PBXBuildFile; fileRef = 54C707DA21D42B710029BFF1 /* NSDictionary+RSXML.h */; }; + 54C707DE21D42B710029BFF1 /* NSDictionary+RSXML.h in Headers */ = {isa = PBXBuildFile; fileRef = 54C707DA21D42B710029BFF1 /* NSDictionary+RSXML.h */; }; 54FCE5F421493B5E00FABB65 /* Resources in Resources */ = {isa = PBXBuildFile; fileRef = 54FCE5F321493B5E00FABB65 /* Resources */; }; 8400B0F01B8C20A9004C4CFF /* RSXMLData.h in Headers */ = {isa = PBXBuildFile; fileRef = 8400B0EE1B8C20A9004C4CFF /* RSXMLData.h */; settings = {ATTRIBUTES = (Public, ); }; }; 8400B0F11B8C20A9004C4CFF /* RSXMLData.m in Sources */ = {isa = PBXBuildFile; fileRef = 8400B0EF1B8C20A9004C4CFF /* RSXMLData.m */; }; @@ -21,21 +29,16 @@ 842D515B1B52E81B00E63D52 /* RSRSSParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 842D51591B52E81B00E63D52 /* RSRSSParser.m */; }; 842D51631B53058B00E63D52 /* RSParsedArticle.h in Headers */ = {isa = PBXBuildFile; fileRef = 842D51611B53058B00E63D52 /* RSParsedArticle.h */; settings = {ATTRIBUTES = (Public, ); }; }; 842D51641B53058B00E63D52 /* RSParsedArticle.m in Sources */ = {isa = PBXBuildFile; fileRef = 842D51621B53058B00E63D52 /* RSParsedArticle.m */; }; - 842D516F1B5308BD00E63D52 /* FeedParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 842D516D1B5308BD00E63D52 /* FeedParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 842D51761B530BF200E63D52 /* RSParsedFeed.h in Headers */ = {isa = PBXBuildFile; fileRef = 842D51741B530BF200E63D52 /* RSParsedFeed.h */; settings = {ATTRIBUTES = (Public, ); }; }; 842D51771B530BF200E63D52 /* RSParsedFeed.m in Sources */ = {isa = PBXBuildFile; fileRef = 842D51751B530BF200E63D52 /* RSParsedFeed.m */; }; 842D517A1B5311AD00E63D52 /* RSOPMLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 842D51781B5311AD00E63D52 /* RSOPMLParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 842D517B1B5311AD00E63D52 /* RSOPMLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 842D51791B5311AD00E63D52 /* RSOPMLParser.m */; }; - 843819001C8CB00400E2A1DD /* RSSAXHTMLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 843818FE1C8CB00400E2A1DD /* RSSAXHTMLParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; - 843819011C8CB00400E2A1DD /* RSSAXHTMLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 843818FF1C8CB00400E2A1DD /* RSSAXHTMLParser.m */; }; 8475C4081D57AB4C0076751E /* RSHTMLLinkParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 8475C4061D57AB4C0076751E /* RSHTMLLinkParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 8475C4091D57AB4C0076751E /* RSHTMLLinkParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 8475C4071D57AB4C0076751E /* RSHTMLLinkParser.m */; }; 8486F1151BB646140092794F /* NSString+RSXML.h in Headers */ = {isa = PBXBuildFile; fileRef = 8486F1131BB646140092794F /* NSString+RSXML.h */; settings = {ATTRIBUTES = (Public, ); }; }; 8486F1161BB646140092794F /* NSString+RSXML.m in Sources */ = {isa = PBXBuildFile; fileRef = 8486F1141BB646140092794F /* NSString+RSXML.m */; }; 84AD0BF51E11A6FB00B38510 /* RSDateParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84AD0BF31E11A6FB00B38510 /* RSDateParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0BF61E11A6FB00B38510 /* RSDateParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84AD0BF41E11A6FB00B38510 /* RSDateParser.m */; }; - 84AD0BFA1E11A9A700B38510 /* RSXMLInternal.h in Headers */ = {isa = PBXBuildFile; fileRef = 84AD0BF81E11A9A700B38510 /* RSXMLInternal.h */; }; - 84AD0BFB1E11A9A700B38510 /* RSXMLInternal.m in Sources */ = {isa = PBXBuildFile; fileRef = 84AD0BF91E11A9A700B38510 /* RSXMLInternal.m */; }; 84AD0C0D1E11B8BE00B38510 /* RSXML.h in Headers */ = {isa = PBXBuildFile; fileRef = 84F22C101B52DDEA000060CE /* RSXML.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0C0E1E11B8CA00B38510 /* RSXMLError.h in Headers */ = {isa = PBXBuildFile; fileRef = 84E4BE431C8B8FE400A90B41 /* RSXMLError.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0C0F1E11B8CA00B38510 /* RSXMLError.m in Sources */ = {isa = PBXBuildFile; fileRef = 84E4BE441C8B8FE400A90B41 /* RSXMLError.m */; }; @@ -53,7 +56,6 @@ 84AD0C1D1E11B8CF00B38510 /* RSOPMLItem.m in Sources */ = {isa = PBXBuildFile; fileRef = 8429D1B51C83A03100F97695 /* RSOPMLItem.m */; }; 84AD0C221E11B8D400B38510 /* RSFeedParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 842D51501B52E80100E63D52 /* RSFeedParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0C231E11B8D400B38510 /* RSFeedParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 842D51511B52E80100E63D52 /* RSFeedParser.m */; }; - 84AD0C241E11B8D400B38510 /* FeedParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 842D516D1B5308BD00E63D52 /* FeedParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0C251E11B8D400B38510 /* RSAtomParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 842D514A1B52E7FC00E63D52 /* RSAtomParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0C261E11B8D400B38510 /* RSAtomParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 842D514B1B52E7FC00E63D52 /* RSAtomParser.m */; }; 84AD0C271E11B8D400B38510 /* RSRSSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 842D51581B52E81B00E63D52 /* RSRSSParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; @@ -62,16 +64,12 @@ 84AD0C2A1E11B8D400B38510 /* RSParsedFeed.m in Sources */ = {isa = PBXBuildFile; fileRef = 842D51751B530BF200E63D52 /* RSParsedFeed.m */; }; 84AD0C2B1E11B8D400B38510 /* RSParsedArticle.h in Headers */ = {isa = PBXBuildFile; fileRef = 842D51611B53058B00E63D52 /* RSParsedArticle.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0C2C1E11B8D400B38510 /* RSParsedArticle.m in Sources */ = {isa = PBXBuildFile; fileRef = 842D51621B53058B00E63D52 /* RSParsedArticle.m */; }; - 84AD0C2D1E11B8DA00B38510 /* RSSAXHTMLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 843818FE1C8CB00400E2A1DD /* RSSAXHTMLParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; - 84AD0C2E1E11B8DA00B38510 /* RSSAXHTMLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 843818FF1C8CB00400E2A1DD /* RSSAXHTMLParser.m */; }; 84AD0C2F1E11B8DA00B38510 /* RSHTMLMetadataParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84BF3E141C8CDD1A005562D8 /* RSHTMLMetadataParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0C301E11B8DA00B38510 /* RSHTMLMetadataParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 84BF3E151C8CDD1A005562D8 /* RSHTMLMetadataParser.m */; }; 84AD0C311E11B8DA00B38510 /* RSHTMLMetadata.h in Headers */ = {isa = PBXBuildFile; fileRef = 84BF3E1A1C8CDD6D005562D8 /* RSHTMLMetadata.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0C321E11B8DA00B38510 /* RSHTMLMetadata.m in Sources */ = {isa = PBXBuildFile; fileRef = 84BF3E1B1C8CDD6D005562D8 /* RSHTMLMetadata.m */; }; 84AD0C331E11B8DA00B38510 /* RSHTMLLinkParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 8475C4061D57AB4C0076751E /* RSHTMLLinkParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84AD0C341E11B8DA00B38510 /* RSHTMLLinkParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 8475C4071D57AB4C0076751E /* RSHTMLLinkParser.m */; }; - 84AD0C351E11B8DD00B38510 /* RSXMLInternal.h in Headers */ = {isa = PBXBuildFile; fileRef = 84AD0BF81E11A9A700B38510 /* RSXMLInternal.h */; }; - 84AD0C361E11B8DD00B38510 /* RSXMLInternal.m in Sources */ = {isa = PBXBuildFile; fileRef = 84AD0BF91E11A9A700B38510 /* RSXMLInternal.m */; }; 84AD0C391E11BAA800B38510 /* libxml2.2.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 84AD0C381E11BAA800B38510 /* libxml2.2.tbd */; }; 84AD0C3B1E11C2D500B38510 /* RSEntityTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 84AD0C3A1E11C2D500B38510 /* RSEntityTests.m */; }; 84AD0C3D1E11D75400B38510 /* RSDateParserTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 84AD0C3C1E11D75400B38510 /* RSDateParserTests.m */; }; @@ -101,9 +99,13 @@ /* End PBXContainerItemProxy section */ /* Begin PBXFileReference section */ + 54702A9621D4079F0050A741 /* RSXMLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSXMLParser.h; sourceTree = ""; }; + 54702A9721D407A00050A741 /* RSXMLParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSXMLParser.m; sourceTree = ""; }; + 54C707D921D42B710029BFF1 /* NSDictionary+RSXML.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSDictionary+RSXML.m"; sourceTree = ""; }; + 54C707DA21D42B710029BFF1 /* NSDictionary+RSXML.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSDictionary+RSXML.h"; sourceTree = ""; }; 54FCE5F321493B5E00FABB65 /* Resources */ = {isa = PBXFileReference; lastKnownFileType = folder; path = Resources; sourceTree = ""; }; - 8400B0EE1B8C20A9004C4CFF /* RSXMLData.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RSXMLData.h; path = RSXML/RSXMLData.h; sourceTree = ""; }; - 8400B0EF1B8C20A9004C4CFF /* RSXMLData.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = RSXMLData.m; path = RSXML/RSXMLData.m; sourceTree = ""; }; + 8400B0EE1B8C20A9004C4CFF /* RSXMLData.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSXMLData.h; sourceTree = ""; }; + 8400B0EF1B8C20A9004C4CFF /* RSXMLData.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSXMLData.m; sourceTree = ""; }; 8429D1B41C83A03100F97695 /* RSOPMLItem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSOPMLItem.h; sourceTree = ""; }; 8429D1B51C83A03100F97695 /* RSOPMLItem.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSOPMLItem.m; sourceTree = ""; }; 8429D1C21C83BCCB00F97695 /* RSOPMLTests.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSOPMLTests.m; sourceTree = ""; }; @@ -115,21 +117,16 @@ 842D51591B52E81B00E63D52 /* RSRSSParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; lineEnding = 0; path = RSRSSParser.m; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.objc; }; 842D51611B53058B00E63D52 /* RSParsedArticle.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParsedArticle.h; sourceTree = ""; }; 842D51621B53058B00E63D52 /* RSParsedArticle.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSParsedArticle.m; sourceTree = ""; }; - 842D516D1B5308BD00E63D52 /* FeedParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FeedParser.h; sourceTree = ""; }; 842D51741B530BF200E63D52 /* RSParsedFeed.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParsedFeed.h; sourceTree = ""; }; 842D51751B530BF200E63D52 /* RSParsedFeed.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSParsedFeed.m; sourceTree = ""; }; 842D51781B5311AD00E63D52 /* RSOPMLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSOPMLParser.h; sourceTree = ""; }; 842D51791B5311AD00E63D52 /* RSOPMLParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSOPMLParser.m; sourceTree = ""; }; - 843818FE1C8CB00400E2A1DD /* RSSAXHTMLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RSSAXHTMLParser.h; path = RSXML/RSSAXHTMLParser.h; sourceTree = ""; }; - 843818FF1C8CB00400E2A1DD /* RSSAXHTMLParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = RSSAXHTMLParser.m; path = RSXML/RSSAXHTMLParser.m; sourceTree = ""; }; 8475C4061D57AB4C0076751E /* RSHTMLLinkParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RSHTMLLinkParser.h; path = RSXML/RSHTMLLinkParser.h; sourceTree = ""; }; 8475C4071D57AB4C0076751E /* RSHTMLLinkParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; lineEnding = 0; name = RSHTMLLinkParser.m; path = RSXML/RSHTMLLinkParser.m; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.objc; }; - 8486F1131BB646140092794F /* NSString+RSXML.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "NSString+RSXML.h"; path = "RSXML/NSString+RSXML.h"; sourceTree = ""; }; - 8486F1141BB646140092794F /* NSString+RSXML.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "NSString+RSXML.m"; path = "RSXML/NSString+RSXML.m"; sourceTree = ""; }; - 84AD0BF31E11A6FB00B38510 /* RSDateParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RSDateParser.h; path = RSXML/RSDateParser.h; sourceTree = ""; }; - 84AD0BF41E11A6FB00B38510 /* RSDateParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = RSDateParser.m; path = RSXML/RSDateParser.m; sourceTree = ""; }; - 84AD0BF81E11A9A700B38510 /* RSXMLInternal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RSXMLInternal.h; path = RSXML/RSXMLInternal.h; sourceTree = ""; }; - 84AD0BF91E11A9A700B38510 /* RSXMLInternal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = RSXMLInternal.m; path = RSXML/RSXMLInternal.m; sourceTree = ""; }; + 8486F1131BB646140092794F /* NSString+RSXML.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSString+RSXML.h"; sourceTree = ""; }; + 8486F1141BB646140092794F /* NSString+RSXML.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSString+RSXML.m"; sourceTree = ""; }; + 84AD0BF31E11A6FB00B38510 /* RSDateParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSDateParser.h; sourceTree = ""; }; + 84AD0BF41E11A6FB00B38510 /* RSDateParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSDateParser.m; sourceTree = ""; }; 84AD0C051E11B7D200B38510 /* RSXML.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = RSXML.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 84AD0C081E11B7D200B38510 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 84AD0C381E11BAA800B38510 /* libxml2.2.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libxml2.2.tbd; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS10.2.sdk/usr/lib/libxml2.2.tbd; sourceTree = DEVELOPER_DIR; }; @@ -139,8 +136,8 @@ 84BF3E151C8CDD1A005562D8 /* RSHTMLMetadataParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; lineEnding = 0; name = RSHTMLMetadataParser.m; path = RSXML/RSHTMLMetadataParser.m; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.objc; }; 84BF3E1A1C8CDD6D005562D8 /* RSHTMLMetadata.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RSHTMLMetadata.h; path = RSXML/RSHTMLMetadata.h; sourceTree = ""; }; 84BF3E1B1C8CDD6D005562D8 /* RSHTMLMetadata.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; lineEnding = 0; name = RSHTMLMetadata.m; path = RSXML/RSHTMLMetadata.m; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.objc; }; - 84E4BE431C8B8FE400A90B41 /* RSXMLError.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RSXMLError.h; path = RSXML/RSXMLError.h; sourceTree = ""; }; - 84E4BE441C8B8FE400A90B41 /* RSXMLError.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = RSXMLError.m; path = RSXML/RSXMLError.m; sourceTree = ""; }; + 84E4BE431C8B8FE400A90B41 /* RSXMLError.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSXMLError.h; sourceTree = ""; }; + 84E4BE441C8B8FE400A90B41 /* RSXMLError.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSXMLError.m; sourceTree = ""; }; 84E4BE471C8B989D00A90B41 /* RSHTMLTests.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSHTMLTests.m; sourceTree = ""; }; 84F22C0D1B52DDEA000060CE /* RSXML.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = RSXML.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 84F22C101B52DDEA000060CE /* RSXML.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = RSXML.h; path = RSXML/RSXML.h; sourceTree = ""; }; @@ -148,8 +145,8 @@ 84F22C171B52DDEA000060CE /* RSXMLTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = RSXMLTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 84F22C1C1B52DDEA000060CE /* RSXMLTests.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = RSXMLTests.m; sourceTree = ""; }; 84F22C1E1B52DDEA000060CE /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; - 84F22C271B52DDFE000060CE /* RSSAXParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RSSAXParser.h; path = RSXML/RSSAXParser.h; sourceTree = ""; }; - 84F22C281B52DDFE000060CE /* RSSAXParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = RSSAXParser.m; path = RSXML/RSSAXParser.m; sourceTree = ""; }; + 84F22C271B52DDFE000060CE /* RSSAXParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSSAXParser.h; sourceTree = ""; }; + 84F22C281B52DDFE000060CE /* RSSAXParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RSSAXParser.m; sourceTree = ""; }; 84F22C451B52DF90000060CE /* libxml2.2.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libxml2.2.tbd; path = usr/lib/libxml2.2.tbd; sourceTree = SDKROOT; }; /* End PBXFileReference section */ @@ -181,12 +178,33 @@ /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ + 54702A9521D407520050A741 /* General */ = { + isa = PBXGroup; + children = ( + 84F22C271B52DDFE000060CE /* RSSAXParser.h */, + 84F22C281B52DDFE000060CE /* RSSAXParser.m */, + 84E4BE431C8B8FE400A90B41 /* RSXMLError.h */, + 84E4BE441C8B8FE400A90B41 /* RSXMLError.m */, + 84AD0BF31E11A6FB00B38510 /* RSDateParser.h */, + 84AD0BF41E11A6FB00B38510 /* RSDateParser.m */, + 8486F1131BB646140092794F /* NSString+RSXML.h */, + 8486F1141BB646140092794F /* NSString+RSXML.m */, + 54C707DA21D42B710029BFF1 /* NSDictionary+RSXML.h */, + 54C707D921D42B710029BFF1 /* NSDictionary+RSXML.m */, + 8400B0EE1B8C20A9004C4CFF /* RSXMLData.h */, + 8400B0EF1B8C20A9004C4CFF /* RSXMLData.m */, + 54702A9621D4079F0050A741 /* RSXMLParser.h */, + 54702A9721D407A00050A741 /* RSXMLParser.m */, + ); + name = General; + path = RSXML; + sourceTree = ""; + }; 842D515E1B52E83100E63D52 /* Feeds */ = { isa = PBXGroup; children = ( 842D51501B52E80100E63D52 /* RSFeedParser.h */, 842D51511B52E80100E63D52 /* RSFeedParser.m */, - 842D516D1B5308BD00E63D52 /* FeedParser.h */, 842D514A1B52E7FC00E63D52 /* RSAtomParser.h */, 842D514B1B52E7FC00E63D52 /* RSAtomParser.m */, 842D51581B52E81B00E63D52 /* RSRSSParser.h */, @@ -231,14 +249,12 @@ 84E4BE4D1C8B98E400A90B41 /* HTML */ = { isa = PBXGroup; children = ( - 843818FE1C8CB00400E2A1DD /* RSSAXHTMLParser.h */, - 843818FF1C8CB00400E2A1DD /* RSSAXHTMLParser.m */, 84BF3E141C8CDD1A005562D8 /* RSHTMLMetadataParser.h */, 84BF3E151C8CDD1A005562D8 /* RSHTMLMetadataParser.m */, - 84BF3E1A1C8CDD6D005562D8 /* RSHTMLMetadata.h */, - 84BF3E1B1C8CDD6D005562D8 /* RSHTMLMetadata.m */, 8475C4061D57AB4C0076751E /* RSHTMLLinkParser.h */, 8475C4071D57AB4C0076751E /* RSHTMLLinkParser.m */, + 84BF3E1A1C8CDD6D005562D8 /* RSHTMLMetadata.h */, + 84BF3E1B1C8CDD6D005562D8 /* RSHTMLMetadata.m */, ); name = HTML; sourceTree = ""; @@ -247,21 +263,10 @@ isa = PBXGroup; children = ( 84F22C101B52DDEA000060CE /* RSXML.h */, - 84E4BE431C8B8FE400A90B41 /* RSXMLError.h */, - 84E4BE441C8B8FE400A90B41 /* RSXMLError.m */, - 84F22C271B52DDFE000060CE /* RSSAXParser.h */, - 84F22C281B52DDFE000060CE /* RSSAXParser.m */, - 8400B0EE1B8C20A9004C4CFF /* RSXMLData.h */, - 8400B0EF1B8C20A9004C4CFF /* RSXMLData.m */, - 8486F1131BB646140092794F /* NSString+RSXML.h */, - 8486F1141BB646140092794F /* NSString+RSXML.m */, - 84AD0BF31E11A6FB00B38510 /* RSDateParser.h */, - 84AD0BF41E11A6FB00B38510 /* RSDateParser.m */, + 54702A9521D407520050A741 /* General */, 842D517C1B5311B000E63D52 /* OPML */, 842D515E1B52E83100E63D52 /* Feeds */, 84E4BE4D1C8B98E400A90B41 /* HTML */, - 84AD0BF81E11A9A700B38510 /* RSXMLInternal.h */, - 84AD0BF91E11A9A700B38510 /* RSXMLInternal.m */, 84F22C121B52DDEA000060CE /* Info.plist */, 84F22C1B1B52DDEA000060CE /* RSXMLTests */, 84AD0C061E11B7D200B38510 /* RSXMLiOS */, @@ -302,20 +307,19 @@ isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( - 84AD0C351E11B8DD00B38510 /* RSXMLInternal.h in Headers */, 84AD0C1C1E11B8CF00B38510 /* RSOPMLItem.h in Headers */, 84AD0C271E11B8D400B38510 /* RSRSSParser.h in Headers */, 84AD0C161E11B8CA00B38510 /* RSDateParser.h in Headers */, + 54702A9921D407A00050A741 /* RSXMLParser.h in Headers */, 84AD0C101E11B8CA00B38510 /* RSSAXParser.h in Headers */, 84AD0C331E11B8DA00B38510 /* RSHTMLLinkParser.h in Headers */, 84AD0C2F1E11B8DA00B38510 /* RSHTMLMetadataParser.h in Headers */, 84AD0C251E11B8D400B38510 /* RSAtomParser.h in Headers */, 84AD0C121E11B8CA00B38510 /* RSXMLData.h in Headers */, 84AD0C311E11B8DA00B38510 /* RSHTMLMetadata.h in Headers */, - 84AD0C241E11B8D400B38510 /* FeedParser.h in Headers */, 84AD0C221E11B8D400B38510 /* RSFeedParser.h in Headers */, - 84AD0C2D1E11B8DA00B38510 /* RSSAXHTMLParser.h in Headers */, 84AD0C0E1E11B8CA00B38510 /* RSXMLError.h in Headers */, + 54C707DE21D42B710029BFF1 /* NSDictionary+RSXML.h in Headers */, 84AD0C2B1E11B8D400B38510 /* RSParsedArticle.h in Headers */, 84AD0C291E11B8D400B38510 /* RSParsedFeed.h in Headers */, 84AD0C181E11B8CF00B38510 /* RSOPMLParser.h in Headers */, @@ -331,6 +335,7 @@ 8486F1151BB646140092794F /* NSString+RSXML.h in Headers */, 8475C4081D57AB4C0076751E /* RSHTMLLinkParser.h in Headers */, 84AD0BF51E11A6FB00B38510 /* RSDateParser.h in Headers */, + 54702A9821D407A00050A741 /* RSXMLParser.h in Headers */, 8400B0F01B8C20A9004C4CFF /* RSXMLData.h in Headers */, 842D51631B53058B00E63D52 /* RSParsedArticle.h in Headers */, 842D517A1B5311AD00E63D52 /* RSOPMLParser.h in Headers */, @@ -338,15 +343,13 @@ 842D51761B530BF200E63D52 /* RSParsedFeed.h in Headers */, 84BF3E1C1C8CDD6D005562D8 /* RSHTMLMetadata.h in Headers */, 8429D1B61C83A03100F97695 /* RSOPMLItem.h in Headers */, - 843819001C8CB00400E2A1DD /* RSSAXHTMLParser.h in Headers */, 842D51521B52E80100E63D52 /* RSFeedParser.h in Headers */, + 54C707DD21D42B710029BFF1 /* NSDictionary+RSXML.h in Headers */, 84E4BE451C8B8FE400A90B41 /* RSXMLError.h in Headers */, - 842D516F1B5308BD00E63D52 /* FeedParser.h in Headers */, 84F22C111B52DDEA000060CE /* RSXML.h in Headers */, 842D514C1B52E7FC00E63D52 /* RSAtomParser.h in Headers */, 842D515A1B52E81B00E63D52 /* RSRSSParser.h in Headers */, 84F22C291B52DDFE000060CE /* RSSAXParser.h in Headers */, - 84AD0BFA1E11A9A700B38510 /* RSXMLInternal.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -482,7 +485,6 @@ buildActionMask = 2147483647; files = ( 84AD0C231E11B8D400B38510 /* RSFeedParser.m in Sources */, - 84AD0C361E11B8DD00B38510 /* RSXMLInternal.m in Sources */, 84AD0C171E11B8CA00B38510 /* RSDateParser.m in Sources */, 84AD0C191E11B8CF00B38510 /* RSOPMLParser.m in Sources */, 84AD0C341E11B8DA00B38510 /* RSHTMLLinkParser.m in Sources */, @@ -493,11 +495,12 @@ 84AD0C2A1E11B8D400B38510 /* RSParsedFeed.m in Sources */, 84AD0C261E11B8D400B38510 /* RSAtomParser.m in Sources */, 84AD0C1D1E11B8CF00B38510 /* RSOPMLItem.m in Sources */, + 54702A9B21D407A00050A741 /* RSXMLParser.m in Sources */, 84AD0C131E11B8CA00B38510 /* RSXMLData.m in Sources */, 84AD0C321E11B8DA00B38510 /* RSHTMLMetadata.m in Sources */, + 54C707DC21D42B710029BFF1 /* NSDictionary+RSXML.m in Sources */, 84AD0C111E11B8CA00B38510 /* RSSAXParser.m in Sources */, 84AD0C0F1E11B8CA00B38510 /* RSXMLError.m in Sources */, - 84AD0C2E1E11B8DA00B38510 /* RSSAXHTMLParser.m in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -516,12 +519,12 @@ 8475C4091D57AB4C0076751E /* RSHTMLLinkParser.m in Sources */, 8429D1B71C83A03100F97695 /* RSOPMLItem.m in Sources */, 84BF3E1D1C8CDD6D005562D8 /* RSHTMLMetadata.m in Sources */, + 54702A9A21D407A00050A741 /* RSXMLParser.m in Sources */, 842D51531B52E80100E63D52 /* RSFeedParser.m in Sources */, 84F22C2A1B52DDFE000060CE /* RSSAXParser.m in Sources */, + 54C707DB21D42B710029BFF1 /* NSDictionary+RSXML.m in Sources */, 8400B0F11B8C20A9004C4CFF /* RSXMLData.m in Sources */, 842D51771B530BF200E63D52 /* RSParsedFeed.m in Sources */, - 843819011C8CB00400E2A1DD /* RSSAXHTMLParser.m in Sources */, - 84AD0BFB1E11A9A700B38510 /* RSXMLInternal.m in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/RSXML/FeedParser.h b/RSXML/FeedParser.h deleted file mode 100644 index cda6714..0000000 --- a/RSXML/FeedParser.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// FeedParser.h -// RSXML -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@class RSParsedFeed; -@class RSXMLData; - - -@protocol FeedParser - -+ (BOOL)canParseFeed:(RSXMLData * _Nonnull)xmlData; - -- (nonnull instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData; - -- (nullable RSParsedFeed *)parseFeed; - - -@end diff --git a/RSXML/NSDictionary+RSXML.h b/RSXML/NSDictionary+RSXML.h new file mode 100644 index 0000000..a2eddba --- /dev/null +++ b/RSXML/NSDictionary+RSXML.h @@ -0,0 +1,28 @@ +// +// MIT License (MIT) +// +// Copyright (c) 2016 Brent Simmons +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#import + +@interface NSDictionary (RSXML) +- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key; +@end diff --git a/RSXML/NSDictionary+RSXML.m b/RSXML/NSDictionary+RSXML.m new file mode 100644 index 0000000..f79b2f3 --- /dev/null +++ b/RSXML/NSDictionary+RSXML.m @@ -0,0 +1,41 @@ +// +// MIT License (MIT) +// +// Copyright (c) 2016 Brent Simmons +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#import "NSDictionary+RSXML.h" + +@implementation NSDictionary (RSXML) + +- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key { + id obj = self[key]; + if (obj) { + return obj; + } + for (NSString *oneKey in self.allKeys) { + if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) { + return self[oneKey]; + } + } + return nil; +} + +@end diff --git a/RSXML/NSString+RSXML.h b/RSXML/NSString+RSXML.h index 352caf6..170693a 100644 --- a/RSXML/NSString+RSXML.h +++ b/RSXML/NSString+RSXML.h @@ -1,16 +1,34 @@ // -// NSString+RSXML.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 9/25/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. @import Foundation; @interface NSString (RSXML) - (NSString *)rs_stringByDecodingHTMLEntities; +- (nonnull NSString *)rsxml_md5HashString; +- (nullable NSString *)absoluteURLWithBase:(nonnull NSURL *)baseURL; @end diff --git a/RSXML/NSString+RSXML.m b/RSXML/NSString+RSXML.m index 3856d2a..125ea9c 100644 --- a/RSXML/NSString+RSXML.m +++ b/RSXML/NSString+RSXML.m @@ -1,23 +1,66 @@ // -// NSString+RSXML.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 9/25/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import "NSString+RSXML.h" - +#import @interface NSScanner (RSXML) - - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity; - @end +#pragma mark - NSString + + @implementation NSString (RSXML) +- (NSData *)rsxml_md5Hash { + + NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding]; + unsigned char hash[CC_MD5_DIGEST_LENGTH]; + CC_MD5(data.bytes, (CC_LONG)data.length, hash); + + return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH]; +} + +- (NSString *)rsxml_md5HashString { + + NSData *md5Data = [self rsxml_md5Hash]; + const Byte *bytes = md5Data.bytes; + return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]]; +} + +- (NSString *)absoluteURLWithBase:(NSURL *)baseURL { + if (baseURL && ![[self lowercaseString] hasPrefix:@"http"]) { + NSURL *resolvedURL = [NSURL URLWithString:self relativeToURL:baseURL]; + if (resolvedURL.absoluteString) { + return resolvedURL.absoluteString; + } + } + return self; +} + - (NSString *)rs_stringByDecodingHTMLEntities { @autoreleasepool { @@ -106,16 +149,18 @@ static NSString *RSXMLStringWithValue(unichar value); @end + +#pragma mark - NSScanner + + @implementation NSScanner (RSXML) - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { - NSString *s = self.string; NSUInteger initialScanLocation = self.scanLocation; static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity. while (true) { - unichar ch = [s characterAtIndex:self.scanLocation]; if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) { break; @@ -138,12 +183,15 @@ static NSString *RSXMLStringWithValue(unichar value); break; } } - return NO; } @end + +#pragma mark - C Functions + + static NSString *RSXMLStringWithValue(unichar value) { return [[NSString alloc] initWithFormat:@"%C", value]; diff --git a/RSXML/RSAtomParser.h b/RSXML/RSAtomParser.h index 5092ddd..40f0c6c 100644 --- a/RSXML/RSAtomParser.h +++ b/RSXML/RSAtomParser.h @@ -1,13 +1,32 @@ // -// RSAtomParser.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 1/15/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -#import "FeedParser.h" +#import "RSFeedParser.h" -@interface RSAtomParser : NSObject +// +// https://validator.w3.org/feed/docs/rfc4287.html + +@interface RSAtomParser : RSFeedParser @end diff --git a/RSXML/RSAtomParser.m b/RSXML/RSAtomParser.m index 2a46a19..d95fef9 100755 --- a/RSXML/RSAtomParser.m +++ b/RSXML/RSAtomParser.m @@ -1,601 +1,253 @@ // -// RSAtomParser.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 1/15/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -#import #import "RSAtomParser.h" -#import "RSSAXParser.h" -#import "FeedParser.h" #import "RSParsedFeed.h" #import "RSParsedArticle.h" -#import "RSXMLData.h" -#import "NSString+RSXML.h" -#import "RSDateParser.h" +static NSString *kAlternateValue = @"alternate"; +static NSString *kRelatedValue = @"related"; @interface RSAtomParser () - -@property (nonatomic) NSData *feedData; -@property (nonatomic) NSString *urlString; -@property (nonatomic) BOOL endFeedFound; -@property (nonatomic) BOOL parsingXHTML; -@property (nonatomic) BOOL parsingSource; -@property (nonatomic) BOOL parsingArticle; -@property (nonatomic) BOOL parsingAuthor; -@property (nonatomic) NSMutableArray *attributesStack; -@property (nonatomic, readonly) NSDictionary *currentAttributes; +@property (nonatomic, assign) BOOL endFeedFound; +@property (nonatomic, assign) BOOL parsingXHTML; +@property (nonatomic, assign) BOOL parsingSource; +@property (nonatomic, assign) BOOL parsingArticle; +@property (nonatomic, assign) BOOL parsingAuthor; @property (nonatomic) NSMutableString *xhtmlString; -@property (nonatomic) NSString *feedLink; -@property (nonatomic) NSString *feedTitle; -@property (nonatomic) NSString *feedSubtitle; -@property (nonatomic) NSMutableArray *articles; -@property (nonatomic) NSDate *dateParsed; -@property (nonatomic) RSSAXParser *parser; -@property (nonatomic, readonly) RSParsedArticle *currentArticle; -@property (nonatomic, readonly) NSDate *currentDate; - @end @implementation RSAtomParser -#pragma mark - Class Methods +#pragma mark - RSXMLParserDelegate -+ (BOOL)canParseFeed:(RSXMLData *)xmlData { - - // Checking for ' entryRange.location) { - return NO; // Wrong order. - } - } - - return YES; ++ (NSArray *)parserRequireOrderedTags { + return @[@" 0) { + NSString *urlString = attribs[@"href"]; + if (urlString.length == 0) { return; } - NSString *related = self.currentAttributes[kRelKey]; - if (related == kAlternateValue) { - self.feedLink = self.currentAttributes[kHrefKey]; - } -} - - -- (void)addFeedTitle { - - if (self.feedTitle.length < 1) { - self.feedTitle = self.parser.currentStringWithTrimmedWhitespace; - } -} - -- (void)addFeedSubtitle { - - if (self.feedSubtitle.length < 1) { - self.feedSubtitle = self.parser.currentStringWithTrimmedWhitespace; - } -} - -- (void)addLink { - - NSString *urlString = self.currentAttributes[kHrefKey]; - if (urlString.length < 1) { - return; - } - - NSString *rel = self.currentAttributes[kRelKey]; - if (rel.length < 1) { + NSString *rel = attribs[@"rel"]; + if (rel.length == 0) { rel = kAlternateValue; } - if (rel == kAlternateValue) { - if (!self.currentArticle.link) { - self.currentArticle.link = urlString; + if (!self.parsingArticle) { // Feed + if (!self.parsedFeed.link && rel == kAlternateValue) { + self.parsedFeed.link = urlString; } } - else if (rel == kRelatedValue) { - if (!self.currentArticle.permalink) { + else if (!self.parsingSource) { // Article + if (!self.currentArticle.link && rel == kAlternateValue) { + self.currentArticle.link = urlString; + } + else if (!self.currentArticle.permalink && rel == kRelatedValue) { self.currentArticle.permalink = urlString; } } } -- (void)addContent { - - self.currentArticle.body = [self currentStringWithHTMLEntitiesDecoded]; -} +#pragma mark - Parse XHTML -- (void)addSummary { - - self.currentArticle.abstract = [self currentStringWithHTMLEntitiesDecoded]; -} - - -- (NSString *)currentStringWithHTMLEntitiesDecoded { - - return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities]; -} - - -- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (prefix) { - return; - } - - if (RSSAXEqualTags(localName, kID, kIDLength)) { - self.currentArticle.guid = self.parser.currentStringWithTrimmedWhitespace; - } - - else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - self.currentArticle.title = [self currentStringWithHTMLEntitiesDecoded]; - } - - else if (RSSAXEqualTags(localName, kContent, kContentLength)) { - [self addContent]; - } - - else if (RSSAXEqualTags(localName, kSummary, kSummaryLength)) { - [self addSummary]; - } - - else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - [self addLink]; - } - - else if (RSSAXEqualTags(localName, kPublished, kPublishedLength)) { - self.currentArticle.datePublished = self.currentDate; - } - - else if (RSSAXEqualTags(localName, kUpdated, kUpdatedLength)) { - self.currentArticle.dateModified = self.currentDate; - } -} - - -- (void)addXHTMLTag:(const xmlChar *)localName { +- (void)addXHTMLTag:(const xmlChar *)localName attributes:(NSDictionary*)attribs { if (!localName) { return; } - [self.xhtmlString appendString:@"<"]; - [self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]]; + [self.xhtmlString appendFormat:@"<%s", localName]; - if (self.currentAttributes.count < 1) { - [self.xhtmlString appendString:@">"]; - return; - } - - for (NSString *oneKey in self.currentAttributes) { - - [self.xhtmlString appendString:@" "]; - - NSString *oneValue = self.currentAttributes[oneKey]; - [self.xhtmlString appendString:oneKey]; - - [self.xhtmlString appendString:@"=\""]; - - oneValue = [oneValue stringByReplacingOccurrencesOfString:@"\"" withString:@"""]; - [self.xhtmlString appendString:oneValue]; - - [self.xhtmlString appendString:@"\""]; + for (NSString *key in attribs) { + NSString *val = [attribs[key] stringByReplacingOccurrencesOfString:@"\"" withString:@"""]; + [self.xhtmlString appendFormat:@" %@=\"%@\"", key, val]; } [self.xhtmlString appendString:@">"]; } +- (void)parseXHTMLEndElement:(const xmlChar *)localName length:(int)len { + if (len == 7) { + if (EqualBytes(localName, "content", 7)) { + if (self.parsingArticle) { + self.currentArticle.body = [self.xhtmlString copy]; + } + self.parsingXHTML = NO; + } + else if (EqualBytes(localName, "summary", 7)) { + if (self.parsingArticle) { + self.currentArticle.abstract = [self.xhtmlString copy]; + } + self.parsingXHTML = NO; + } + } + [self.xhtmlString appendFormat:@"", localName]; +} + #pragma mark - RSSAXParserDelegate + - (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { if (self.endFeedFound) { return; } - NSDictionary *xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - if (!xmlAttributes) { - xmlAttributes = [NSDictionary dictionary]; - } - [self.attributesStack addObject:xmlAttributes]; - if (self.parsingXHTML) { - [self addXHTMLTag:localName]; + NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; + [self addXHTMLTag:localName attributes:attribs]; return; } - - if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { - self.parsingArticle = YES; - [self addArticle]; - return; + + int len = xmlStrlen(localName); + switch (len) { + case 4: + if (EqualBytes(localName, "link", 4)) { + NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; + [self setFeedOrArticleLink:attribs]; + return; + } + break; + case 5: + if (EqualBytes(localName, "entry", 5)) { + self.parsingArticle = YES; + self.currentArticle = [self.parsedFeed appendNewArticle]; + return; + } + break; + case 6: + if (EqualBytes(localName, "author", 6)) { + self.parsingAuthor = YES; + return; + } else if (EqualBytes(localName, "source", 6)) { + self.parsingSource = YES; + return; + } + break; + case 7: // uses attrib + if (self.parsingArticle) { + break; + } + if (!EqualBytes(localName, "content", 7) && !EqualBytes(localName, "summary", 7)) { + break; + } + NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; + if ([attribs[@"type"] isEqualToString:@"xhtml"]) { + self.parsingXHTML = YES; + self.xhtmlString = [NSMutableString stringWithString:@""]; + return; + } + break; } - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = YES; - return; - } - - if (RSSAXEqualTags(localName, kSource, kSourceLength)) { - self.parsingSource = YES; - return; - } - - BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); - BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); - if (self.parsingArticle && (isContentTag || isSummaryTag)) { - - NSString *contentType = xmlAttributes[kTypeKey]; - if ([contentType isEqualToString:kXHTMLType]) { - self.parsingXHTML = YES; - self.xhtmlString = [NSMutableString stringWithString:@""]; - return; - } - } - - if (!self.parsingArticle && RSSAXEqualTags(localName, kLink, kLinkLength)) { - [self addFeedLink]; - return; - } - - [self.parser beginStoringCharacters]; + [SAXParser beginStoringCharacters]; } - (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - if (RSSAXEqualTags(localName, kFeed, kFeedLength)) { - self.endFeedFound = YES; - return; - } - if (self.endFeedFound) { return; } - if (self.parsingXHTML) { - - BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); - BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); - - if (self.parsingArticle) { - if (isContentTag) { - self.currentArticle.body = [self.xhtmlString copy]; - } - else if (isSummaryTag) { - self.currentArticle.abstract = [self.xhtmlString copy]; - } - } - - if (isContentTag || isSummaryTag) { - self.parsingXHTML = NO; - } - - [self.xhtmlString appendString:@""]; - } - - else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = NO; - } - - else if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { - self.parsingArticle = NO; - } - - else if (self.parsingArticle && !self.parsingSource) { - [self addArticleElement:localName prefix:prefix]; - } + int len = xmlStrlen(localName); - else if (RSSAXEqualTags(localName, kSource, kSourceLength)) { - self.parsingSource = NO; + if (len == 4 && EqualBytes(localName, "feed", 4)) { + self.endFeedFound = YES; + return; } - else if (!self.parsingArticle && !self.parsingSource) { - if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - [self addFeedTitle]; - } - else if (RSSAXEqualTags(localName, kSubtitle, kSubtitleLength)) { - [self addFeedSubtitle]; - } - } - [self.attributesStack removeLastObject]; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - - if (prefix && RSSAXEqualTags(prefix, kXML, kXMLLength)) { - - if (RSSAXEqualTags(name, kBase, kBaseLength)) { - return kXMLBaseKey; - } - if (RSSAXEqualTags(name, kLang, kLangLength)) { - return kXMLLangKey; - } + if (self.parsingXHTML) { + [self parseXHTMLEndElement:localName length:len]; + return; } - if (prefix) { - return nil; + BOOL isArticle = (self.parsingArticle && !self.parsingSource && !prefix); + + switch (len) { + case 2: + if (isArticle && EqualBytes(localName, "id", 2)) { + self.currentArticle.guid = SAXParser.currentStringWithTrimmedWhitespace; + } + return; + case 5: + if (EqualBytes(localName, "entry", 5)) { + self.parsingArticle = NO; + } + else if (isArticle && EqualBytes(localName, "title", 5)) { + self.currentArticle.title = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace]; + } + else if (!self.parsingArticle && !self.parsingSource && self.parsedFeed.title.length == 0) { + if (EqualBytes(localName, "title", 5)) { + self.parsedFeed.title = SAXParser.currentStringWithTrimmedWhitespace; + } + } + return; + case 6: + if (EqualBytes(localName, "author", 6)) { + self.parsingAuthor = NO; + } + else if (EqualBytes(localName, "source", 6)) { + self.parsingSource = NO; + } + return; + case 8: + if (!self.parsingArticle && !self.parsingSource && self.parsedFeed.subtitle.length == 0) { + if (EqualBytes(localName, "subtitle", 8)) { + self.parsedFeed.subtitle = SAXParser.currentStringWithTrimmedWhitespace; + } + } + return; + case 7: + if (isArticle) { + if (EqualBytes(localName, "content", 7)) { + self.currentArticle.body = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace]; + } + else if (EqualBytes(localName, "summary", 7)) { + self.currentArticle.abstract = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace]; + } + else if (EqualBytes(localName, "updated", 7)) { + self.currentArticle.dateModified = [self dateFromCharacters:SAXParser.currentCharacters]; + } + } + return; + case 9: + if (isArticle && EqualBytes(localName, "published", 9)) { + self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters]; + } + return; } - - if (RSSAXEqualTags(name, kRel, kRelLength)) { - return kRelKey; - } - - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return kTypeKey; - } - - if (RSSAXEqualTags(name, kHref, kHrefLength)) { - return kHrefKey; - } - - if (RSSAXEqualTags(name, kAlternate, kAlternateLength)) { - return kAlternateValue; - } - - return nil; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - - static const NSUInteger alternateLength = kAlternateLength - 1; - static const NSUInteger textHTMLLength = kTextHTMLLength - 1; - static const NSUInteger relatedLength = kRelatedLength - 1; - static const NSUInteger shortURLLength = kShortURLLength - 1; - static const NSUInteger htmlLength = kHTMLLength - 1; - static const NSUInteger enLength = kEnLength - 1; - static const NSUInteger textLength = kTextLength - 1; - static const NSUInteger selfLength = kSelfLength - 1; - - if (length == alternateLength && RSSAXEqualBytes(bytes, kAlternate, alternateLength)) { - return kAlternateValue; - } - - if (length == textHTMLLength && RSSAXEqualBytes(bytes, kTextHTML, textHTMLLength)) { - return kTextHTMLValue; - } - - if (length == relatedLength && RSSAXEqualBytes(bytes, kRelated, relatedLength)) { - return kRelatedValue; - } - - if (length == shortURLLength && RSSAXEqualBytes(bytes, kShortURL, shortURLLength)) { - return kShortURLValue; - } - - if (length == htmlLength && RSSAXEqualBytes(bytes, kHTML, htmlLength)) { - return kHTMLValue; - } - - if (length == enLength && RSSAXEqualBytes(bytes, kEn, enLength)) { - return kEnValue; - } - - if (length == textLength && RSSAXEqualBytes(bytes, kText, textLength)) { - return kTextValue; - } - - if (length == selfLength && RSSAXEqualBytes(bytes, kSelf, selfLength)) { - return kSelfValue; - } - - return nil; } @@ -606,4 +258,60 @@ static const NSInteger kSelfLength = 5; } } + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { + + int len = xmlStrlen(name); + + if (prefix) { + if (len == 4 && EqualBytes(prefix, "xml", 3)) { // len == 4 is for the next two lines already + if (EqualBytes(name, "base", 4)) { return @"xml:base"; } + if (EqualBytes(name, "lang", 4)) { return @"xml:lang"; } + } + return nil; + } + + switch (len) { + case 3: + if (EqualBytes(name, "rel", 3)) { return @"rel"; } + break; + case 4: + if (EqualBytes(name, "type", 4)) { return @"type"; } + if (EqualBytes(name, "href", 4)) { return @"href"; } + break; + case 9: + if (EqualBytes(name, "alternate", 9)) { return kAlternateValue; } + break; + } + + return nil; +} + + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { + + switch (length) { + case 2: + if (EqualBytes(bytes, "en", 2)) { return @"en"; } + break; + case 4: + if (EqualBytes(bytes, "html", 4)) { return @"html"; } + if (EqualBytes(bytes, "text", 4)) { return @"text"; } + if (EqualBytes(bytes, "self", 4)) { return @"self"; } + break; + case 7: + if (EqualBytes(bytes, "related", 7)) { return kRelatedValue; } + break; + case 8: + if (EqualBytes(bytes, "shorturl", 8)) { return @"shorturl"; } + break; + case 9: + if (EqualBytes(bytes, "alternate", 9)) { return kAlternateValue; } + if (EqualBytes(bytes, "text/html", 9)) { return @"text/html"; } + break; + } + + return nil; +} + @end diff --git a/RSXML/RSDateParser.h b/RSXML/RSDateParser.h index c593a05..dbe8c43 100644 --- a/RSXML/RSDateParser.h +++ b/RSXML/RSDateParser.h @@ -1,10 +1,25 @@ // -// RSDateParser.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. @import Foundation; diff --git a/RSXML/RSDateParser.m b/RSXML/RSDateParser.m index 7b3947c..cf2a364 100644 --- a/RSXML/RSDateParser.m +++ b/RSXML/RSDateParser.m @@ -1,10 +1,25 @@ // -// RSDateParser.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import #import "RSDateParser.h" diff --git a/RSXML/RSFeedParser.h b/RSXML/RSFeedParser.h index 40b2fef..80640f3 100644 --- a/RSXML/RSFeedParser.h +++ b/RSXML/RSFeedParser.h @@ -1,28 +1,35 @@ // -// RSFeedParser.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 1/4/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -#import "FeedParser.h" +#import "RSXMLParser.h" -// If you have a feed and don’t know or care what it is (RSS or Atom), -// then call RSParseFeed or RSParseFeedSync. +@class RSParsedFeed, RSParsedArticle; -@class RSXMLData; -@class RSParsedFeed; +@interface RSFeedParser : RSXMLParser +@property (nonatomic, readonly) RSParsedFeed *parsedFeed; +@property (nonatomic, weak) RSParsedArticle *currentArticle; -NS_ASSUME_NONNULL_BEGIN - -BOOL RSCanParseFeed(RSXMLData *xmlData); - - -typedef void (^RSParsedFeedBlock)(RSParsedFeed * _Nullable parsedFeed, NSError * _Nullable error); - -// callback is called on main queue. -void RSParseFeed(RSXMLData *xmlData, RSParsedFeedBlock callback); -RSParsedFeed * _Nullable RSParseFeedSync(RSXMLData *xmlData, NSError * _Nullable * _Nullable error); - -NS_ASSUME_NONNULL_END +- (NSDate *)dateFromCharacters:(NSData *)data; +- (NSString *)decodeHTMLEntities:(NSString *)str; +@end diff --git a/RSXML/RSFeedParser.m b/RSXML/RSFeedParser.m index 5367922..dc27878 100644 --- a/RSXML/RSFeedParser.m +++ b/RSXML/RSFeedParser.m @@ -1,229 +1,58 @@ // -// FeedParser.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 1/4/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -#import "RSXMLError.h" #import "RSFeedParser.h" -#import "FeedParser.h" -#import "RSXMLData.h" -#import "RSRSSParser.h" -#import "RSAtomParser.h" +#import "RSParsedFeed.h" +#import "RSParsedArticle.h" +#import "RSDateParser.h" +#import "NSString+RSXML.h" -static NSArray *parserClasses(void) { - - static NSArray *gParserClasses = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - - gParserClasses = @[[RSRSSParser class], [RSAtomParser class]]; - }); - - return gParserClasses; -} +@implementation RSFeedParser -static BOOL feedMayBeParseable(RSXMLData *xmlData) { - - /*Sanity checks.*/ - - if (!xmlData.data) { - return NO; - } +#pragma mark - RSXMLParserDelegate - /*TODO: check size, type, etc.*/ - ++ (BOOL)isFeedParser { return YES; } + +- (BOOL)xmlParserWillStartParsing { + _parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.documentURI]; return YES; } -static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes); -static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes); -static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes); -static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes); -static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes); -static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes); - -static const NSUInteger maxNumberOfBytesToSearch = 4096; -static const NSUInteger minNumberOfBytesToSearch = 20; - -static Class parserClassForXMLData(RSXMLData *xmlData, NSError **error) { - - if (!feedMayBeParseable(xmlData)) { - RSXMLSetError(error, RSXMLErrorNoData, nil); - return nil; - } - - // TODO: check for things like images and movies and return nil. - - const char *bytes = xmlData.data.bytes; - NSUInteger numberOfBytes = xmlData.data.length; - - if (numberOfBytes > minNumberOfBytesToSearch) { - - if (numberOfBytes > maxNumberOfBytesToSearch) { - numberOfBytes = maxNumberOfBytesToSearch; - } - - if (!dataHasLeftCaret(bytes, numberOfBytes)) { - RSXMLSetError(error, RSXMLErrorMissingLeftCaret, nil); - return nil; - } - if (optimisticCanParseRSSData(bytes, numberOfBytes)) { - return [RSRSSParser class]; - } - if (optimisticCanParseAtomData(bytes, numberOfBytes)) { - return [RSAtomParser class]; - } - if (optimisticCanParseRDF(bytes, numberOfBytes)) { - return [RSRSSParser class]; //TODO: parse RDF feeds, using RSS parser so far ... - } - if (dataIsProbablyHTML(bytes, numberOfBytes)) { - RSXMLSetError(error, RSXMLErrorProbablyHTML, nil); - return nil; - } - if (dataIsSomeWeirdException(bytes, numberOfBytes)) { - RSXMLSetError(error, RSXMLErrorContainsXMLErrorsTag, nil); - return nil; - } - } - - for (Class parserClass in parserClasses()) { - if ([parserClass canParseFeed:xmlData]) { - return parserClass; - //return [[parserClass alloc] initWithXMLData:xmlData]; // does not make sense to return instance - } - } - // Try RSS anyway? libxml would return a parsing error - RSXMLSetError(error, RSXMLErrorNoSuitableParser, nil); - return nil; +- (id)xmlParserWillReturnDocument { + // Optimization: make articles do calculations on this background thread. + [_parsedFeed.articles makeObjectsPerformSelector:@selector(calculateArticleID)]; + return _parsedFeed; } -static id parserForXMLData(RSXMLData *xmlData, NSError **error) { - - Class parserClass = parserClassForXMLData(xmlData, error); - if (!parserClass) { - return nil; - } - return [[parserClass alloc] initWithXMLData:xmlData]; +/// @return @c NSDate by parsing RFC 822 and 8601 date strings. +- (NSDate *)dateFromCharacters:(NSData *)data { + return RSDateWithBytes(data.bytes, data.length); } -static BOOL canParseXMLData(RSXMLData *xmlData) { - - return parserClassForXMLData(xmlData, nil) != nil; -} - -static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) { - - char *foundString = strnstr(bytes, string, numberOfBytes); - return foundString != NULL; -} - -static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes) { - - return didFindString("<", bytes, numberOfBytes); -} - -static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes) { - - // Won’t catch every single case, which is fine. - - if (didFindString(" 0) { + typeStr = [typeStr lowercaseString]; + if ([typeStr hasSuffix:@"/rss+xml"]) { + return RSFeedTypeRSS; + } else if ([typeStr hasSuffix:@"/atom+xml"]) { + return RSFeedTypeAtom; + } + } + return RSFeedTypeNone; +} -static NSString *kShortcutIconRelValue = @"shortcut icon"; -static NSString *kHrefKey = @"href"; -static NSString *kSrcKey = @"src"; -static NSString *kAppleTouchIconValue = @"apple-touch-icon"; -static NSString *kAppleTouchIconPrecomposedValue = @"apple-touch-icon-precomposed"; -static NSString *kSizesKey = @"sizes"; -static NSString *kTitleKey = @"title"; -static NSString *kRelKey = @"rel"; -static NSString *kAlternateKey = @"alternate"; -static NSString *kRSSSuffix = @"/rss+xml"; -static NSString *kAtomSuffix = @"/atom+xml"; -static NSString *kTypeKey = @"type"; - -@interface RSHTMLMetadataAppleTouchIcon () - -- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString; +@implementation RSHTMLMetadataLink +- (NSString*)description { return self.link; } @end -@interface RSHTMLMetadataFeedLink () +@implementation RSHTMLMetadataIconLink -- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString; - -@end - - -@implementation RSHTMLMetadata - - -#pragma mark - Init - -- (instancetype)initWithURLString:(NSString *)urlString dictionaries:(NSArray *)dictionaries { - - self = [super init]; - if (!self) { - return nil; - } - - _baseURLString = urlString; - _dictionaries = dictionaries; - _faviconLink = [self resolvedLinkFromFirstDictionaryWithMatchingRel:kShortcutIconRelValue]; - - NSArray *appleTouchIconDictionaries = [self appleTouchIconDictionaries]; - _appleTouchIcons = objectsOfClassWithDictionaries([RSHTMLMetadataAppleTouchIcon class], appleTouchIconDictionaries, urlString); - - NSArray *feedLinkDictionaries = [self feedLinkDictionaries]; - _feedLinks = objectsOfClassWithDictionaries([RSHTMLMetadataFeedLink class], feedLinkDictionaries, urlString); - - return self; -} - - -#pragma mark - Private - -- (NSDictionary *)firstDictionaryWithMatchingRel:(NSString *)valueToMatch { - - // Case-insensitive. - - for (NSDictionary *oneDictionary in self.dictionaries) { - - NSString *oneRelValue = relValue(oneDictionary); - if (oneRelValue && [oneRelValue compare:valueToMatch options:NSCaseInsensitiveSearch] == NSOrderedSame) { - return oneDictionary; +- (CGSize)getSize { + if (self.sizes && self.sizes.length > 0) { + NSArray *parts = [self.sizes componentsSeparatedByString:@"x"]; + if (parts.count == 2) { + return CGSizeMake([parts.firstObject intValue], [parts.lastObject intValue]); } } - - return nil; + return CGSizeZero; } - -- (NSArray *)appleTouchIconDictionaries { - - NSMutableArray *dictionaries = [NSMutableArray new]; - - for (NSDictionary *oneDictionary in self.dictionaries) { - - NSString *oneRelValue = relValue(oneDictionary).lowercaseString; - if ([oneRelValue isEqualToString:kAppleTouchIconValue] || [oneRelValue isEqualToString:kAppleTouchIconPrecomposedValue]) { - [dictionaries addObject:oneDictionary]; - } - } - - return dictionaries; +- (NSString*)description { + return [NSString stringWithFormat:@"%@ [%@] (%@)", self.title, self.sizes, self.link]; } - -- (NSArray *)feedLinkDictionaries { - - NSMutableArray *dictionaries = [NSMutableArray new]; - - for (NSDictionary *oneDictionary in self.dictionaries) { - - NSString *oneRelValue = relValue(oneDictionary).lowercaseString; - if (![oneRelValue isEqualToString:kAlternateKey]) { - continue; - } - - NSString *oneType = [oneDictionary rsxml_objectForCaseInsensitiveKey:kTypeKey]; - if (!typeIsFeedType(oneType)) { - continue; - } - - if (RSXMLStringIsEmpty(urlStringFromDictionary(oneDictionary))) { - continue; - } - - [dictionaries addObject:oneDictionary]; - } - - return dictionaries; -} - - -- (NSString *)resolvedLinkFromFirstDictionaryWithMatchingRel:(NSString *)relValue { - - NSDictionary *d = [self firstDictionaryWithMatchingRel:relValue]; - return absoluteURLStringWithDictionary(d, self.baseURLString); -} - - -@end - - -static NSString *relValue(NSDictionary *d) { - - return [d rsxml_objectForCaseInsensitiveKey:kRelKey]; -} - - -static NSString *urlStringFromDictionary(NSDictionary *d) { - - NSString *urlString = [d rsxml_objectForCaseInsensitiveKey:kHrefKey]; - if (urlString) { - return urlString; - } - - return [d rsxml_objectForCaseInsensitiveKey:kSrcKey]; -} - - -static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString) { - - NSURL *url = [NSURL URLWithString:baseURLString]; - if (!url) { - return nil; - } - - NSURL *absoluteURL = [NSURL URLWithString:relativeURLString relativeToURL:url]; - return absoluteURL.absoluteString; -} - - -static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString) { - - NSString *urlString = urlStringFromDictionary(d); - if (RSXMLStringIsEmpty(urlString)) { - return nil; - } - return absoluteURLStringWithRelativeURLString(urlString, baseURLString); -} - - -static NSArray *objectsOfClassWithDictionaries(Class class, NSArray *dictionaries, NSString *baseURLString) { - - NSMutableArray *objects = [NSMutableArray new]; - - for (NSDictionary *oneDictionary in dictionaries) { - - id oneObject = [[class alloc] initWithDictionary:oneDictionary baseURLString:baseURLString]; - if (oneObject) { - [objects addObject:oneObject]; - } - } - - return [objects copy]; -} - - -static BOOL typeIsFeedType(NSString *type) { - - type = type.lowercaseString; - return [type hasSuffix:kRSSSuffix] || [type hasSuffix:kAtomSuffix]; -} - - -@implementation RSHTMLMetadataAppleTouchIcon - - -- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _sizes = [d rsxml_objectForCaseInsensitiveKey:kSizesKey]; - _rel = [d rsxml_objectForCaseInsensitiveKey:kRelKey]; - - return self; -} - - @end @implementation RSHTMLMetadataFeedLink - -- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; +- (NSString*)description { + NSString *prefix; + switch (_type) { + case RSFeedTypeNone: prefix = @"None"; break; + case RSFeedTypeRSS: prefix = @"RSS"; break; + case RSFeedTypeAtom: prefix = @"Atom"; break; } - - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _title = [d rsxml_objectForCaseInsensitiveKey:kTitleKey]; - _type = [d rsxml_objectForCaseInsensitiveKey:kTypeKey]; - - return self; + return [NSString stringWithFormat:@"[%@] %@ (%@)", prefix, self.title, self.link]; } - @end + +@implementation RSHTMLMetadataAnchor + +- (NSString*)description { + if (!_tooltip) { + return [NSString stringWithFormat:@"%@ (%@)", self.title, self.link]; + } + return [NSString stringWithFormat:@"%@ [%@] (%@)", self.title, self.tooltip, self.link]; +} + +@end + + +@implementation RSHTMLMetadata + +- (NSString*)description { + return [NSString stringWithFormat:@"favicon: %@\nFeed links: %@\nIcons: %@\n", + self.faviconLink, self.feedLinks, self.iconLinks]; +} + +@end diff --git a/RSXML/RSHTMLMetadataParser.h b/RSXML/RSHTMLMetadataParser.h index 310de9f..35e05df 100644 --- a/RSXML/RSHTMLMetadataParser.h +++ b/RSXML/RSHTMLMetadataParser.h @@ -1,28 +1,32 @@ // -// RSHTMLMetadataParser.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. @import Foundation; - +#import "RSXMLParser.h" @class RSHTMLMetadata; -@class RSXMLData; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSHTMLMetadataParser : NSObject - -+ (RSHTMLMetadata *)HTMLMetadataWithXMLData:(RSXMLData *)xmlData; - -- (instancetype)initWithXMLData:(RSXMLData *)xmlData; - -@property (nonatomic, readonly) RSHTMLMetadata *metadata; +@interface RSHTMLMetadataParser : RSXMLParser @end - -NS_ASSUME_NONNULL_END diff --git a/RSXML/RSHTMLMetadataParser.m b/RSXML/RSHTMLMetadataParser.m index ad8f957..d16af53 100644 --- a/RSXML/RSHTMLMetadataParser.m +++ b/RSXML/RSHTMLMetadataParser.m @@ -1,128 +1,111 @@ // -// RSHTMLMetadataParser.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -#import #import "RSHTMLMetadataParser.h" -#import "RSXMLData.h" #import "RSHTMLMetadata.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSXMLInternal.h" - - -@interface RSHTMLMetadataParser () - -@property (nonatomic, readonly) RSXMLData *xmlData; -@property (nonatomic, readwrite) RSHTMLMetadata *metadata; -@property (nonatomic) NSMutableArray *dictionaries; -@property (nonatomic) BOOL didFinishParsing; +#import "NSString+RSXML.h" +#import "NSDictionary+RSXML.h" +@interface RSHTMLMetadataParser() +@property (nonatomic, readonly) NSURL *baseURL; +@property (nonatomic) NSString *faviconLink; +@property (nonatomic) NSMutableArray *iconLinks; +@property (nonatomic) NSMutableArray *feedLinks; @end - @implementation RSHTMLMetadataParser +#pragma mark - RSXMLParserDelegate -#pragma mark - Class Methods ++ (BOOL)isHTMLParser { return YES; } -+ (RSHTMLMetadata *)HTMLMetadataWithXMLData:(RSXMLData *)xmlData { +- (BOOL)xmlParserWillStartParsing { + _baseURL = [NSURL URLWithString:self.documentURI]; + _iconLinks = [NSMutableArray new]; + _feedLinks = [NSMutableArray new]; + return YES; +} - RSHTMLMetadataParser *parser = [[self alloc] initWithXMLData:xmlData]; - return parser.metadata; +- (id)xmlParserWillReturnDocument { + RSHTMLMetadata *metadata = [[RSHTMLMetadata alloc] init]; + metadata.faviconLink = self.faviconLink; + metadata.feedLinks = [self.feedLinks copy]; + metadata.iconLinks = [self.iconLinks copy]; + return metadata; } -#pragma mark - Init - -- (instancetype)initWithXMLData:(RSXMLData *)xmlData { - - NSParameterAssert(xmlData.data); - NSParameterAssert(xmlData.urlString); - - self = [super init]; - if (!self) { - return nil; - } - - _xmlData = xmlData; - _dictionaries = [NSMutableArray new]; - - [self parse]; - - return self; -} +#pragma mark - RSSAXParserDelegate -#pragma mark - Parse +- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { -- (void)parse { - - RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; - [parser parseData:self.xmlData.data]; - [parser finishParsing]; - - self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.xmlData.urlString dictionaries:[self.dictionaries copy]]; -} - - -static NSString *kHrefKey = @"href"; -static NSString *kSrcKey = @"src"; -static NSString *kRelKey = @"rel"; - -- (NSString *)linkForDictionary:(NSDictionary *)d { - - NSString *link = [d rsxml_objectForCaseInsensitiveKey:kHrefKey]; - if (link) { - return link; - } - - return [d rsxml_objectForCaseInsensitiveKey:kSrcKey]; -} - - -- (void)handleLinkAttributes:(NSDictionary *)d { - - if (RSXMLStringIsEmpty([d rsxml_objectForCaseInsensitiveKey:kRelKey])) { + if (xmlStrlen(localName) != 4) { return; } - if (RSXMLStringIsEmpty([self linkForDictionary:d])) { - return; + else if (EqualBytes(localName, "body", 4)) { + [SAXParser cancel]; // we're only interested in head + } + else if (EqualBytes(localName, "link", 4)) { + [self parseLinkItemWithAttributes:[SAXParser attributesDictionaryHTML:attributes]]; } - - [self.dictionaries addObject:d]; } - -#pragma mark - RSSAXHTMLParserDelegate - -static const char *kBody = "body"; -static const NSInteger kBodyLength = 5; -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - if (self.didFinishParsing) { +- (void)parseLinkItemWithAttributes:(NSDictionary*)attribs { + if (!attribs || attribs.count == 0) return; + NSString *rel = [attribs rsxml_objectForCaseInsensitiveKey:@"rel"]; + if (!rel || rel.length == 0) + return; + NSString *link = [attribs rsxml_objectForCaseInsensitiveKey:@"href"]; + if (!link) { + link = [attribs rsxml_objectForCaseInsensitiveKey:@"src"]; + if (!link) + return; } - if (RSSAXEqualTags(localName, kBody, kBodyLength)) { - self.didFinishParsing = YES; - return; + rel = [rel lowercaseString]; + + if ([rel isEqualToString:@"shortcut icon"]) { + self.faviconLink = [link absoluteURLWithBase:self.baseURL]; } - - if (!RSSAXEqualTags(localName, kLink, kLinkLength)) { - return; + else if ([rel isEqualToString:@"icon"] || [rel hasPrefix:@"apple-touch-icon"]) { // also matching "apple-touch-icon-precomposed" + RSHTMLMetadataIconLink *icon = [RSHTMLMetadataIconLink new]; + icon.link = [link absoluteURLWithBase:self.baseURL]; + icon.title = rel; + icon.sizes = [attribs rsxml_objectForCaseInsensitiveKey:@"sizes"]; + [self.iconLinks addObject:icon]; } - - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSXMLIsEmpty(d)) { - [self handleLinkAttributes:d]; + else if ([rel isEqualToString:@"alternate"]) { + RSFeedType type = RSFeedTypeFromLinkTypeAttribute([attribs rsxml_objectForCaseInsensitiveKey:@"type"]); + if (type != RSFeedTypeNone) { + RSHTMLMetadataFeedLink *feedLink = [RSHTMLMetadataFeedLink new]; + feedLink.link = [link absoluteURLWithBase:self.baseURL]; + feedLink.title = [attribs rsxml_objectForCaseInsensitiveKey:@"title"]; + feedLink.type = type; + [self.feedLinks addObject:feedLink]; + } } } diff --git a/RSXML/RSOPMLItem.h b/RSXML/RSOPMLItem.h index a609012..0ddfc69 100644 --- a/RSXML/RSOPMLItem.h +++ b/RSXML/RSOPMLItem.h @@ -1,3 +1,25 @@ +// +// MIT License (MIT) +// +// Copyright (c) 2018 Oleg Geier +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. @import Foundation; diff --git a/RSXML/RSOPMLItem.m b/RSXML/RSOPMLItem.m index 49ac893..23d610d 100644 --- a/RSXML/RSOPMLItem.m +++ b/RSXML/RSOPMLItem.m @@ -1,6 +1,28 @@ +// +// MIT License (MIT) +// +// Copyright (c) 2018 Oleg Geier +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import "RSOPMLItem.h" -#import "RSXMLInternal.h" +#import "NSDictionary+RSXML.h" NSString *OPMLTextKey = @"text"; @@ -63,8 +85,8 @@ NSString *OPMLXMLURLKey = @"xmlUrl"; } - (id)attributeForKey:(NSString *)key { - if (self.attributes.count > 0 && !RSXMLStringIsEmpty(key)) { - return [self.attributes rsxml_objectForCaseInsensitiveKey:key]; + if (self.mutableAttributes.count > 0 && key && key.length > 0) { + return [self.mutableAttributes rsxml_objectForCaseInsensitiveKey:key]; } return nil; } diff --git a/RSXML/RSOPMLParser.h b/RSXML/RSOPMLParser.h index 61f3aec..e28925c 100644 --- a/RSXML/RSOPMLParser.h +++ b/RSXML/RSOPMLParser.h @@ -1,29 +1,35 @@ // -// RSOPMLParser.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -@import Foundation; +#import "RSXMLParser.h" +// +// http://dev.opml.org/spec2.html#subscriptionLists -@class RSXMLData; @class RSOPMLItem; - -typedef void (^RSParsedOPMLBlock)(RSOPMLItem *opmlDocument, NSError *error); - -void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback); //async; calls back on main thread. - - -@interface RSOPMLParser: NSObject - -- (instancetype)initWithXMLData:(RSXMLData *)xmlData; - -@property (nonatomic, readonly) RSOPMLItem *opmlDocument; -@property (nonatomic, readonly) NSError *error; +@interface RSOPMLParser: RSXMLParser @end diff --git a/RSXML/RSOPMLParser.m b/RSXML/RSOPMLParser.m index 8dbeaaf..f2d7b68 100644 --- a/RSXML/RSOPMLParser.m +++ b/RSXML/RSOPMLParser.m @@ -1,172 +1,76 @@ // -// RSOPMLParser.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import "RSOPMLParser.h" -#import -#import "RSXMLData.h" -#import "RSSAXParser.h" #import "RSOPMLItem.h" -#import "RSXMLError.h" - -void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback) { - - NSCParameterAssert(xmlData); - NSCParameterAssert(callback); - - dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{ - - @autoreleasepool { - - RSOPMLParser *parser = [[RSOPMLParser alloc] initWithXMLData:xmlData]; - - RSOPMLItem *document = parser.opmlDocument; - NSError *error = parser.error; - - dispatch_async(dispatch_get_main_queue(), ^{ - - callback(document, error); - }); - } - }); -} - - -@interface RSOPMLParser () - -@property (nonatomic, readwrite) RSOPMLItem *opmlDocument; -@property (nonatomic, readwrite) NSError *error; +@interface RSOPMLParser() +@property (nonatomic, assign) BOOL parsingHead; +@property (nonatomic) RSOPMLItem *opmlDocument; @property (nonatomic) NSMutableArray *itemStack; - @end @implementation RSOPMLParser +#pragma mark - RSXMLParserDelegate -#pragma mark - Init ++ (BOOL)isOPMLParser { return YES; } -- (instancetype)initWithXMLData:(RSXMLData *)XMLData { - - self = [super init]; - if (!self) { - return nil; - } - - [self parse:XMLData]; - - return self; ++ (NSArray*)parserRequireOrderedTags { + return @[@" 0, nil); - - /*If itemStack is empty, bad things are happening. - But we still shouldn't crash in production.*/ - - if (self.itemStack.count > 0) { - [self.itemStack removeLastObject]; - } +- (id)xmlParserWillReturnDocument { + return self.opmlDocument; } #pragma mark - RSSAXParserDelegate -static const char *kOutline = "outline"; -static const char kOutlineLength = 8; -static const char *kHead = "head"; -static const char kHeadLength = 5; -static BOOL isHead = NO; - (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) { + int len = xmlStrlen(localName); + + if (len == 7 && EqualBytes(localName, "outline", 7)) { RSOPMLItem *item = [RSOPMLItem new]; item.attributes = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; [self.itemStack.lastObject addChild:item]; [self.itemStack addObject:item]; - } else if (RSSAXEqualTags(localName, kHead, kHeadLength)) { - isHead = YES; - } else if (isHead) { + } + else if (len == 4 && EqualBytes(localName, "head", 4)) { + self.parsingHead = YES; + } + else if (self.parsingHead) { [SAXParser beginStoringCharacters]; } } @@ -174,13 +78,17 @@ static BOOL isHead = NO; - (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) { - [self popItem]; - } else if (RSSAXEqualTags(localName, kHead, kHeadLength)) { - isHead = NO; - } else if (isHead) { + int len = xmlStrlen(localName); + + if (len == 7 && EqualBytes(localName, "outline", 7)) { + [self.itemStack removeLastObject]; // safe to be called on empty array + } + else if (len == 4 && EqualBytes(localName, "head", 4)) { + self.parsingHead = NO; + } + else if (self.parsingHead) { // handle xml tags in head as if they were attributes NSString *key = [NSString stringWithFormat:@"%s", localName]; - [self.itemStack.lastObject setAttribute:[SAXParser currentString] forKey:key]; + [self.itemStack.lastObject setAttribute:SAXParser.currentStringWithTrimmedWhitespace forKey:key]; } } @@ -191,24 +99,24 @@ static BOOL isHead = NO; return nil; } - size_t nameLength = strlen((const char *)name); - switch (nameLength) { + int len = xmlStrlen(name); + switch (len) { case 4: - if (RSSAXEqualTags(name, "text", 5)) return OPMLTextKey; - if (RSSAXEqualTags(name, "type", 5)) return OPMLTypeKey; + if (EqualBytes(name, "text", 4)) return OPMLTextKey; + if (EqualBytes(name, "type", 4)) return OPMLTypeKey; break; case 5: - if (RSSAXEqualTags(name, "title", 6)) return OPMLTitleKey; + if (EqualBytes(name, "title", 5)) return OPMLTitleKey; break; case 6: - if (RSSAXEqualTags(name, "xmlUrl", 7)) return OPMLXMLURLKey; + if (EqualBytes(name, "xmlUrl", 6)) return OPMLXMLURLKey; break; case 7: - if (RSSAXEqualTags(name, "version", 8)) return OPMLVersionKey; - if (RSSAXEqualTags(name, "htmlUrl", 8)) return OPMLHMTLURLKey; + if (EqualBytes(name, "version", 7)) return OPMLVersionKey; + if (EqualBytes(name, "htmlUrl", 7)) return OPMLHMTLURLKey; break; case 11: - if (RSSAXEqualTags(name, "description", 12)) return OPMLDescriptionKey; + if (EqualBytes(name, "description", 11)) return OPMLDescriptionKey; break; } return nil; @@ -220,11 +128,10 @@ static BOOL isHead = NO; if (length < 1) { return @""; } else if (length == 3) { - if (RSSAXEqualBytes(bytes, "RSS", 3)) return @"RSS"; - if (RSSAXEqualBytes(bytes, "rss", 3)) return @"rss"; + if (EqualBytes(bytes, "RSS", 3)) return @"RSS"; + if (EqualBytes(bytes, "rss", 3)) return @"rss"; } return nil; } - @end diff --git a/RSXML/RSParsedArticle.h b/RSXML/RSParsedArticle.h index e42a7ce..3f90e2e 100755 --- a/RSXML/RSParsedArticle.h +++ b/RSXML/RSParsedArticle.h @@ -1,20 +1,37 @@ // -// RSParsedArticle.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 12/6/14. -// Copyright (c) 2014 Ranchero Software LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. @import Foundation; @interface RSParsedArticle : NSObject -- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL; +- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL dateParsed:(NSDate*)parsed; @property (nonatomic, readonly, nonnull) NSString *feedURL; -@property (nonatomic, nonnull) NSString *articleID; //Calculated. Don't get until other properties have been set. +@property (nonatomic, readonly, nonnull) NSDate *dateParsed; +@property (nonatomic, readonly, nonnull) NSString *articleID; //Calculated. Don't get until other properties have been set. @property (nonatomic, nullable) NSString *guid; @property (nonatomic, nullable) NSString *title; @@ -25,7 +42,6 @@ @property (nonatomic, nullable) NSString *author; @property (nonatomic, nullable) NSDate *datePublished; @property (nonatomic, nullable) NSDate *dateModified; -@property (nonatomic, nonnull) NSDate *dateParsed; - (void)calculateArticleID; // Optimization. Call after all properties have been set. Call on a background thread. diff --git a/RSXML/RSParsedArticle.m b/RSXML/RSParsedArticle.m index b822ab7..15d73db 100755 --- a/RSXML/RSParsedArticle.m +++ b/RSXML/RSParsedArticle.m @@ -1,101 +1,104 @@ // -// RSParsedArticle.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 12/6/14. -// Copyright (c) 2014 Ranchero Software LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import "RSParsedArticle.h" -#import "RSXMLInternal.h" +#import "NSString+RSXML.h" + +@interface RSParsedArticle() +@property (nonatomic, copy) NSString *internalArticleID; +@end @implementation RSParsedArticle - -#pragma mark - Init - -- (instancetype)initWithFeedURL:(NSString *)feedURL { +- (instancetype)initWithFeedURL:(NSString *)feedURL dateParsed:(NSDate*)parsed { NSParameterAssert(feedURL != nil); self = [super init]; - if (!self) { - return nil; + if (self) { + _feedURL = feedURL; + _dateParsed = parsed; } - - _feedURL = feedURL; - _dateParsed = [NSDate date]; - return self; } +#pragma mark - Unique Article ID -#pragma mark - Accessors - +/** + Article ID will be generated on the first access. + */ - (NSString *)articleID { - - if (!_articleID) { - _articleID = self.calculatedUniqueID; + if (!_internalArticleID) { + _internalArticleID = self.calculatedUniqueID; } - - return _articleID; + return _internalArticleID; } +/** + Initiate calculation of article id. + */ +- (void)calculateArticleID { + (void)self.articleID; +} +/** + @return MD5 hash of @c feedURL @c + @c guid. Or a combination of properties when guid is not set. + @note + In general, feeds should have guids. When they don't, re-runs are very likely, + because there's no other 100% reliable way to determine identity. + */ - (NSString *)calculatedUniqueID { - /*guid+feedID, or a combination of properties when no guid. Then hash the result. - In general, feeds should have guids. When they don't, re-runs are very likely, - because there's no other 100% reliable way to determine identity.*/ - - NSMutableString *s = [NSMutableString stringWithString:@""]; + NSAssert(self.feedURL != nil, @"Feed URL should always be set!"); + NSMutableString *s = [NSMutableString stringWithString:self.feedURL]; - NSString *datePublishedTimeStampString = nil; - if (self.datePublished) { - datePublishedTimeStampString = [NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970]; - } - - if (!RSXMLStringIsEmpty(self.guid)) { + if (self.guid.length > 0) { [s appendString:self.guid]; } - - else if (!RSXMLStringIsEmpty(self.link) && self.datePublished != nil) { - [s appendString:self.link]; - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSXMLStringIsEmpty(self.title) && self.datePublished != nil) { - [s appendString:self.title]; - [s appendString:datePublishedTimeStampString]; - } - else if (self.datePublished != nil) { - [s appendString:datePublishedTimeStampString]; + + if (self.link.length > 0) { + [s appendString:self.link]; + } else if (self.title.length > 0) { + [s appendString:self.title]; + } + [s appendString:[NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970]]; } - - else if (!RSXMLStringIsEmpty(self.link)) { + else if (self.link.length > 0) { [s appendString:self.link]; } - - else if (!RSXMLStringIsEmpty(self.title)) { + else if (self.title.length > 0) { [s appendString:self.title]; } - - else if (!RSXMLStringIsEmpty(self.body)) { + else if (self.body.length > 0) { [s appendString:self.body]; } - - NSAssert(!RSXMLStringIsEmpty(self.feedURL), nil); - [s appendString:self.feedURL]; - return [s rsxml_md5HashString]; } -- (void)calculateArticleID { - - (void)self.articleID; -} +#pragma mark - Printing - (NSString*)description { return [NSString stringWithFormat:@"{%@ '%@', guid: %@}", [self class], self.title, self.guid]; diff --git a/RSXML/RSParsedFeed.h b/RSXML/RSParsedFeed.h index 956ad97..c5c9d2e 100755 --- a/RSXML/RSParsedFeed.h +++ b/RSXML/RSParsedFeed.h @@ -1,23 +1,41 @@ // -// RSParsedFeed.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. @import Foundation; @class RSParsedArticle; @interface RSParsedFeed : NSObject - -- (nonnull instancetype)initWithURLString:(NSString * _Nonnull)urlString title:(NSString * _Nullable)title link:(NSString * _Nullable)link articles:(NSArray * _Nonnull)articles; - @property (nonatomic, readonly, nonnull) NSString *urlString; -@property (nonatomic, readonly, nullable) NSString *title; -@property (nonatomic, readonly, nullable) NSString *link; +@property (nonatomic, readonly, nonnull) NSDate *dateParsed; +@property (nonatomic, readonly, nonnull) NSArray *articles; + +@property (nonatomic, nullable) NSString *title; +@property (nonatomic, nullable) NSString *link; @property (nonatomic, nullable) NSString *subtitle; -@property (nonatomic, readonly, nonnull) NSArray *articles; + +- (nonnull instancetype)initWithURLString:(NSString * _Nonnull)urlString; +- (RSParsedArticle *)appendNewArticle; @end diff --git a/RSXML/RSParsedFeed.m b/RSXML/RSParsedFeed.m index 6527bc8..993cc29 100755 --- a/RSXML/RSParsedFeed.m +++ b/RSXML/RSParsedFeed.m @@ -1,33 +1,65 @@ // -// RSParsedFeed.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import "RSParsedFeed.h" +#import "RSParsedArticle.h" + +@interface RSParsedFeed() +@property (nonatomic) NSMutableArray *mutableArticles; +@end @implementation RSParsedFeed -- (instancetype)initWithURLString:(NSString *)urlString title:(NSString *)title link:(NSString *)link articles:(NSArray *)articles { +- (instancetype)initWithURLString:(NSString *)urlString { self = [super init]; - if (!self) { - return nil; + if (self) { + _urlString = urlString; + _mutableArticles = [NSMutableArray new]; + _dateParsed = [NSDate date]; } - - _urlString = urlString; - _title = title; - _link = link; - _articles = articles; - return self; } +- (NSArray *)articles { + return _mutableArticles; +} + +/** + Append new @c RSParsedArticle object to @c .articles and return newly inserted instance. + */ +- (RSParsedArticle *)appendNewArticle { + RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString dateParsed:_dateParsed]; + [_mutableArticles addObject:article]; + return article; +} + +#pragma mark - Printing + - (NSString*)description { return [NSString stringWithFormat:@"{%@ (%@), title: '%@', subtitle: '%@', entries: %@}", - [self class], _link, _title, _subtitle, _articles]; + [self class], _link, _title, _subtitle, _mutableArticles]; } @end diff --git a/RSXML/RSRSSParser.h b/RSXML/RSRSSParser.h index 65b20fd..5c1e9eb 100644 --- a/RSXML/RSRSSParser.h +++ b/RSXML/RSRSSParser.h @@ -1,13 +1,32 @@ // -// RSRSSParser.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 1/6/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -#import "FeedParser.h" +#import "RSFeedParser.h" -@interface RSRSSParser : NSObject +// +// https://cyber.harvard.edu/rss/rss.html + +@interface RSRSSParser : RSFeedParser @end diff --git a/RSXML/RSRSSParser.m b/RSXML/RSRSSParser.m index 5409cdb..012c8fa 100755 --- a/RSXML/RSRSSParser.m +++ b/RSXML/RSRSSParser.m @@ -1,351 +1,52 @@ // -// RSRSSParser.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 1/6/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -#import #import "RSRSSParser.h" -#import "RSSAXParser.h" #import "RSParsedFeed.h" #import "RSParsedArticle.h" -#import "RSXMLData.h" -#import "RSXMLInternal.h" #import "NSString+RSXML.h" -#import "RSDateParser.h" +#import "NSDictionary+RSXML.h" - -@interface RSRSSParser () - -@property (nonatomic) NSData *feedData; -@property (nonatomic) NSString *urlString; -@property (nonatomic) NSDictionary *currentAttributes; -@property (nonatomic) RSSAXParser *parser; -@property (nonatomic) NSMutableArray *articles; -@property (nonatomic) BOOL parsingArticle; -@property (nonatomic, readonly) RSParsedArticle *currentArticle; -@property (nonatomic) BOOL parsingChannelImage; -@property (nonatomic, readonly) NSDate *currentDate; -@property (nonatomic) BOOL endRSSFound; -@property (nonatomic) NSString *feedLink; -@property (nonatomic) NSString *feedTitle; -@property (nonatomic) NSString *feedSubtitle; -@property (nonatomic) NSDate *dateParsed; - -@end - - -@implementation RSRSSParser - -#pragma mark - Class Methods - -+ (BOOL)canParseFeed:(RSXMLData *)xmlData { - - // Checking for '' within first n characters should do it. - // TODO: handle RSS 1.0 - - @autoreleasepool { - - NSData *feedData = xmlData.data; - NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)feedData.bytes length:feedData.length encoding:NSUTF8StringEncoding freeWhenDone:NO]; - if (!s) { - s = [[NSString alloc] initWithData:feedData encoding:NSUTF8StringEncoding]; - } - if (!s) { - s = [[NSString alloc] initWithData:feedData encoding:NSUnicodeStringEncoding]; - } - if (!s) { - return NO; - } - - static const NSInteger numberOfCharactersToSearch = 4096; - NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch); - if (s.length < numberOfCharactersToSearch) { - rangeToSearch.length = s.length; - } - - NSRange rssRange = [s rangeOfString:@"" options:NSLiteralSearch range:rangeToSearch]; - if (rssRange.length < 1 || channelRange.length < 1) { - return NO; - } - - if (rssRange.location > channelRange.location) { - return NO; // Wrong order. - } - } - - return YES; -} - - -#pragma mark - Init - -- (instancetype)initWithXMLData:(RSXMLData *)xmlData { - - self = [super init]; - if (!self) { - return nil; - } - - _feedData = xmlData.data; - _urlString = xmlData.urlString; - _parser = [[RSSAXParser alloc] initWithDelegate:self]; - _articles = [NSMutableArray new]; - - return self; -} - - -#pragma mark - API - -- (RSParsedFeed *)parseFeed { - - [self parse]; - - RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.feedTitle link:self.feedLink articles:self.articles]; - parsedFeed.subtitle = self.feedSubtitle; - - return parsedFeed; -} - - -#pragma mark - Constants - -static NSString *kIsPermaLinkKey = @"isPermaLink"; -static NSString *kURLKey = @"url"; -static NSString *kLengthKey = @"length"; -static NSString *kTypeKey = @"type"; -static NSString *kFalseValue = @"false"; -static NSString *kTrueValue = @"true"; -static NSString *kContentEncodedKey = @"content:encoded"; -static NSString *kDCDateKey = @"dc:date"; -static NSString *kDCCreatorKey = @"dc:creator"; static NSString *kRDFAboutKey = @"rdf:about"; -static const char *kItem = "item"; -static const NSInteger kItemLength = 5; +@interface RSRSSParser () +@property (nonatomic) BOOL parsingArticle; +@property (nonatomic) BOOL parsingChannelImage; +@property (nonatomic) BOOL guidIsPermalink; +@property (nonatomic) BOOL endRSSFound; +@property (nonatomic) NSURL *baseURL; +@end -static const char *kImage = "image"; -static const NSInteger kImageLength = 6; +// TODO: handle RSS 1.0 +@implementation RSRSSParser -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; +#pragma mark - RSXMLParserDelegate -static const char *kTitle = "title"; -static const NSInteger kTitleLength = 6; - -static const char *kDC = "dc"; -static const NSInteger kDCLength = 3; - -static const char *kCreator = "creator"; -static const NSInteger kCreatorLength = 8; - -static const char *kDate = "date"; -static const NSInteger kDateLength = 5; - -static const char *kContent = "content"; -static const NSInteger kContentLength = 8; - -static const char *kEncoded = "encoded"; -static const NSInteger kEncodedLength = 8; - -static const char *kGuid = "guid"; -static const NSInteger kGuidLength = 5; - -static const char *kPubDate = "pubDate"; -static const NSInteger kPubDateLength = 8; - -static const char *kAuthor = "author"; -static const NSInteger kAuthorLength = 7; - -static const char *kDescription = "description"; -static const NSInteger kDescriptionLength = 12; - -static const char *kRSS = "rss"; -static const NSInteger kRSSLength = 4; - -static const char *kURL = "url"; -static const NSInteger kURLLength = 4; - -static const char *kLength = "length"; -static const NSInteger kLengthLength = 7; - -static const char *kType = "type"; -static const NSInteger kTypeLength = 5; - -static const char *kIsPermaLink = "isPermaLink"; -static const NSInteger kIsPermaLinkLength = 12; - -static const char *kRDF = "rdf"; -static const NSInteger kRDFlength = 4; - -static const char *kAbout = "about"; -static const NSInteger kAboutLength = 6; - -static const char *kFalse = "false"; -static const NSInteger kFalseLength = 6; - -static const char *kTrue = "true"; -static const NSInteger kTrueLength = 5; - - -#pragma mark - Parsing - -- (void)parse { - - self.dateParsed = [NSDate date]; - - @autoreleasepool { - [self.parser parseData:self.feedData]; - [self.parser finishParsing]; - } - - // Optimization: make articles do calculations on this background thread. - [self.articles makeObjectsPerformSelector:@selector(calculateArticleID)]; ++ (NSArray *)parserRequireOrderedTags { + return @[@""]; } - -- (void)addArticle { - - RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString]; - article.dateParsed = self.dateParsed; - - [self.articles addObject:article]; -} - - -- (RSParsedArticle *)currentArticle { - - return self.articles.lastObject; -} - - -- (void)addFeedElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (prefix != NULL) { - return; - } - - if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - if (!self.feedLink) { - self.feedLink = self.parser.currentStringWithTrimmedWhitespace; - } - } - - else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - self.feedTitle = self.parser.currentStringWithTrimmedWhitespace; - } - - else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) { - self.feedSubtitle = self.parser.currentStringWithTrimmedWhitespace; - } -} - - -- (void)addDCElement:(const xmlChar *)localName { - - if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) { - - self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace; - } - else if (RSSAXEqualTags(localName, kDate, kDateLength)) { - - self.currentArticle.datePublished = self.currentDate; - } -} - - -- (void)addGuid { - - self.currentArticle.guid = self.parser.currentStringWithTrimmedWhitespace; - - NSString *isPermaLinkValue = [self.currentAttributes rsxml_objectForCaseInsensitiveKey:@"ispermalink"]; - if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) { - self.currentArticle.permalink = [self urlString:self.currentArticle.guid]; - } -} - - -- (NSString *)urlString:(NSString *)s { - - /*Resolve against home page URL (if available) or feed URL.*/ - - if ([[s lowercaseString] hasPrefix:@"http"]) { - return s; - } - - if (!self.feedLink) { - //TODO: get feed URL and use that to resolve URL.*/ - return s; - } - - NSURL *baseURL = [NSURL URLWithString:self.feedLink]; - if (!baseURL) { - return s; - } - - NSURL *resolvedURL = [NSURL URLWithString:s relativeToURL:baseURL]; - if (resolvedURL.absoluteString) { - return resolvedURL.absoluteString; - } - - return s; -} - - -- (NSString *)currentStringWithHTMLEntitiesDecoded { - - return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities]; -} - -- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (RSSAXEqualTags(prefix, kDC, kDCLength)) { - - [self addDCElement:localName]; - return; - } - - if (RSSAXEqualTags(prefix, kContent, kContentLength) && RSSAXEqualTags(localName, kEncoded, kEncodedLength)) { - - self.currentArticle.body = [self currentStringWithHTMLEntitiesDecoded]; - return; - } - - if (prefix != NULL) { - return; - } - - if (RSSAXEqualTags(localName, kGuid, kGuidLength)) { - [self addGuid]; - } - else if (RSSAXEqualTags(localName, kPubDate, kPubDateLength)) { - self.currentArticle.datePublished = self.currentDate; - } - else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace; - } - else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - self.currentArticle.link = [self urlString:self.parser.currentStringWithTrimmedWhitespace]; - } - else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) { - self.currentArticle.abstract = [self currentStringWithHTMLEntitiesDecoded]; - } - else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - self.currentArticle.title = [self currentStringWithHTMLEntitiesDecoded]; - } -} - - -- (NSDate *)currentDate { - - return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length); -} - - #pragma mark - RSSAXParserDelegate - (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { @@ -354,31 +55,61 @@ static const NSInteger kTrueLength = 5; return; } - NSDictionary *xmlAttributes = nil; - if (RSSAXEqualTags(localName, kItem, kItemLength) || RSSAXEqualTags(localName, kGuid, kGuidLength)) { - xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - } - if (self.currentAttributes != xmlAttributes) { - self.currentAttributes = xmlAttributes; - } + int len = xmlStrlen(localName); - if (!prefix && RSSAXEqualTags(localName, kItem, kItemLength)) { - - [self addArticle]; - self.parsingArticle = YES; - - if (xmlAttributes && xmlAttributes[kRDFAboutKey]) { /*RSS 1.0 guid*/ - self.currentArticle.guid = xmlAttributes[kRDFAboutKey]; - self.currentArticle.permalink = self.currentArticle.guid; + if (prefix != NULL) { + if (!self.parsingArticle || self.parsingChannelImage) { + return; } + if (len != 4 && len != 7) { + return; + } + int prefLen = xmlStrlen(prefix); + if (prefLen == 2 && EqualBytes(prefix, "dc", 2)) { + if (EqualBytes(localName, "date", 4) || EqualBytes(localName, "creator", 7)) { + [SAXParser beginStoringCharacters]; + } + } + else if (len == 7 && prefLen == 7 && EqualBytes(prefix, "content", 7) && EqualBytes(localName, "encoded", 7)) { + [SAXParser beginStoringCharacters]; + } + return; + } + // else: localname without prefix + switch (len) { + case 4: + if (EqualBytes(localName, "item", 4)) { + self.parsingArticle = YES; + self.currentArticle = [self.parsedFeed appendNewArticle]; + + NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; + if (attribs) { + NSString *about = attribs[kRDFAboutKey]; // RSS 1.0 guid + if (about) { + self.currentArticle.guid = about; + self.currentArticle.permalink = about; + } + } + } + else if (EqualBytes(localName, "guid", 4)) { + NSDictionary *attribs = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; + NSString *isPermaLinkValue = [attribs rsxml_objectForCaseInsensitiveKey:@"isPermaLink"]; + if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) { + self.guidIsPermalink = YES; + } else { + self.guidIsPermalink = NO; + } + } + break; + case 5: + if (EqualBytes(localName, "image", 5)) { + self.parsingChannelImage = YES; + } + break; } - else if (!prefix && RSSAXEqualTags(localName, kImage, kImageLength)) { - self.parsingChannelImage = YES; - } - - if (!self.parsingChannelImage) { - [self.parser beginStoringCharacters]; + if (self.parsingArticle || !self.parsingChannelImage) { + [SAXParser beginStoringCharacters]; } } @@ -388,77 +119,132 @@ static const NSInteger kTrueLength = 5; if (self.endRSSFound) { return; } + + int len = xmlStrlen(localName); - if (RSSAXEqualTags(localName, kRSS, kRSSLength)) { - self.endRSSFound = YES; + // Meta parsing + if (len == 3 && EqualBytes(localName, "rss", 3)) { self.endRSSFound = YES; } + else if (len == 4 && EqualBytes(localName, "item", 4)) { self.parsingArticle = NO; } + else if (len == 5 && EqualBytes(localName, "image", 5)) { self.parsingChannelImage = NO; } + // Always exit if prefix is set + else if (prefix != NULL) + { + if (!self.parsingArticle) { + // Feed parsing + return; + } + int prefLen = xmlStrlen(prefix); + // Article parsing + switch (len) { + case 4: + if (prefLen == 2 && EqualBytes(prefix, "dc", 2) && EqualBytes(localName, "date", 4)) + self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters]; + return; + case 7: + if (prefLen == 2 && EqualBytes(prefix, "dc", 2) && EqualBytes(localName, "creator", 7)) { + self.currentArticle.author = SAXParser.currentStringWithTrimmedWhitespace; + } + else if (prefLen == 7 && EqualBytes(prefix, "content", 7) && EqualBytes(localName, "encoded", 7)) { + self.currentArticle.body = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace]; + } + return; + } } - - else if (RSSAXEqualTags(localName, kImage, kImageLength)) { - self.parsingChannelImage = NO; + // Article parsing + else if (self.parsingArticle) + { + switch (len) { + case 4: + if (EqualBytes(localName, "link", 4)) { + self.currentArticle.link = [SAXParser.currentStringWithTrimmedWhitespace absoluteURLWithBase:self.baseURL]; + } + else if (EqualBytes(localName, "guid", 4)) { + self.currentArticle.guid = SAXParser.currentStringWithTrimmedWhitespace; + if (self.guidIsPermalink) { + self.currentArticle.permalink = [self.currentArticle.guid absoluteURLWithBase:self.baseURL]; + } + } + return; + case 5: + if (EqualBytes(localName, "title", 5)) + self.currentArticle.title = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace]; + return; + case 6: + if (EqualBytes(localName, "author", 6)) + self.currentArticle.author = SAXParser.currentStringWithTrimmedWhitespace; + return; + case 7: + if (EqualBytes(localName, "pubDate", 7)) + self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters]; + return; + case 11: + if (EqualBytes(localName, "description", 11)) + self.currentArticle.abstract = [self decodeHTMLEntities:SAXParser.currentStringWithTrimmedWhitespace]; + return; + } } - - else if (RSSAXEqualTags(localName, kItem, kItemLength)) { - self.parsingArticle = NO; - } - - else if (self.parsingArticle) { - [self addArticleElement:localName prefix:prefix]; - } - - else if (!self.parsingChannelImage) { - [self addFeedElement:localName prefix:prefix]; + // Feed parsing + else if (!self.parsingChannelImage) + { + switch (len) { + case 4: + if (EqualBytes(localName, "link", 4)) { + self.parsedFeed.link = SAXParser.currentStringWithTrimmedWhitespace; + self.baseURL = [NSURL URLWithString:self.parsedFeed.link]; + } + return; + case 5: + if (EqualBytes(localName, "title", 5)) + self.parsedFeed.title = SAXParser.currentStringWithTrimmedWhitespace; + return; + case 11: + if (EqualBytes(localName, "description", 11)) + self.parsedFeed.subtitle = SAXParser.currentStringWithTrimmedWhitespace; + return; + } } } - (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - if (RSSAXEqualTags(prefix, kRDF, kRDFlength)) { - - if (RSSAXEqualTags(name, kAbout, kAboutLength)) { - return kRDFAboutKey; - } - - return nil; - } + int len = xmlStrlen(name); if (prefix) { + if (len == 5 && EqualBytes(prefix, "rdf", 4) && EqualBytes(name, "about", 5)) { // 4 because prefix length is not checked + return kRDFAboutKey; + } return nil; } - if (RSSAXEqualTags(name, kIsPermaLink, kIsPermaLinkLength)) { - return kIsPermaLinkKey; + switch (len) { + case 3: + if (EqualBytes(name, "url", 3)) { return @"url"; } + break; + case 4: + if (EqualBytes(name, "type", 4)) { return @"type"; } + break; + case 6: + if (EqualBytes(name, "length", 6)) { return @"length"; } + break; + case 11: + if (EqualBytes(name, "isPermaLink", 11)) { return @"isPermaLink"; } + break; } - - if (RSSAXEqualTags(name, kURL, kURLLength)) { - return kURLKey; - } - - if (RSSAXEqualTags(name, kLength, kLengthLength)) { - return kLengthKey; - } - - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return kTypeKey; - } - return nil; } - (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - static const NSUInteger falseLength = kFalseLength - 1; - static const NSUInteger trueLength = kTrueLength - 1; - - if (length == falseLength && RSSAXEqualBytes(bytes, kFalse, falseLength)) { - return kFalseValue; + switch (length) { + case 4: + if (EqualBytes(bytes, "true", 4)) { return @"true"; } + break; + case 5: + if (EqualBytes(bytes, "false", 5)) { return @"false"; } + break; } - - if (length == trueLength && RSSAXEqualBytes(bytes, kTrue, trueLength)) { - return kTrueValue; - } - return nil; } diff --git a/RSXML/RSSAXHTMLParser.h b/RSXML/RSSAXHTMLParser.h deleted file mode 100644 index 46305c1..0000000 --- a/RSXML/RSSAXHTMLParser.h +++ /dev/null @@ -1,49 +0,0 @@ -// -// RSSAXHTMLParser.h -// RSXML -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@class RSSAXHTMLParser; - -@protocol RSSAXHTMLParserDelegate - -@optional - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const unsigned char *)localName attributes:(const unsigned char **)attributes; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLEndElement:(const unsigned char *)localName; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length; - -- (void)saxParserDidReachEndOfDocument:(RSSAXHTMLParser *)SAXParser; // If canceled, may not get called (but might). - -@end - - -@interface RSSAXHTMLParser : NSObject - - -- (instancetype)initWithDelegate:(id)delegate; - -- (void)parseData:(NSData *)data; -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; -- (void)finishParsing; -- (void)cancel; - -@property (nonatomic, strong, readonly) NSData *currentCharacters; // nil if not storing characters. UTF-8 encoded. -@property (nonatomic, strong, readonly) NSString *currentString; // Convenience to get string version of currentCharacters. -@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; - -- (void)beginStoringCharacters; // Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement. - -// Delegate can call from within XMLStartElement. - -- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes; - - -@end diff --git a/RSXML/RSSAXHTMLParser.m b/RSXML/RSSAXHTMLParser.m deleted file mode 100644 index 17f0b62..0000000 --- a/RSXML/RSSAXHTMLParser.m +++ /dev/null @@ -1,315 +0,0 @@ -// -// RSSAXHTMLParser.m -// RSXML -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import -#import -#import -#import "RSXMLInternal.h" - - -@interface RSSAXHTMLParser () - -@property (nonatomic) id delegate; -@property (nonatomic, assign) htmlParserCtxtPtr context; -@property (nonatomic, assign) BOOL storingCharacters; -@property (nonatomic) NSMutableData *characters; -@property (nonatomic) BOOL delegateRespondsToStartElementMethod; -@property (nonatomic) BOOL delegateRespondsToEndElementMethod; -@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; -@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; - -@end - - -@implementation RSSAXHTMLParser - - -+ (void)initialize { - - RSSAXInitLibXMLParser(); -} - - -#pragma mark - Init - -- (instancetype)initWithDelegate:(id)delegate { - - self = [super init]; - if (self == nil) - return nil; - - _delegate = delegate; - - if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]) { - _delegateRespondsToStartElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]) { - _delegateRespondsToEndElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { - _delegateRespondsToCharactersFoundMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { - _delegateRespondsToEndOfDocumentMethod = YES; - } - - return self; -} - - -#pragma mark - Dealloc - -- (void)dealloc { - - if (_context != nil) { - htmlFreeParserCtxt(_context); - _context = nil; - } - _delegate = nil; -} - - -#pragma mark - API - -static xmlSAXHandler saxHandlerStruct; - -- (void)parseData:(NSData *)data { - - [self parseBytes:data.bytes numberOfBytes:data.length]; -} - - -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { - - if (self.context == nil) { - - xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes); - self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding); - htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT); - } - - @autoreleasepool { - htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); - } -} - - -- (void)finishParsing { - - NSAssert(self.context != nil, nil); - if (self.context == nil) - return; - - @autoreleasepool { - htmlParseChunk(self.context, nil, 0, 1); - htmlFreeParserCtxt(self.context); - self.context = nil; - self.characters = nil; - } -} - - -- (void)cancel { - - @autoreleasepool { - xmlStopParser(self.context); - } -} - - - -- (void)beginStoringCharacters { - self.storingCharacters = YES; - self.characters = [NSMutableData new]; -} - - -- (void)endStoringCharacters { - self.storingCharacters = NO; - self.characters = nil; -} - - -- (NSData *)currentCharacters { - - if (!self.storingCharacters) { - return nil; - } - - return self.characters; -} - - -- (NSString *)currentString { - - NSData *d = self.currentCharacters; - if (RSXMLIsEmpty(d)) { - return nil; - } - - return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; -} - - -- (NSString *)currentStringWithTrimmedWhitespace { - - return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; -} - - -#pragma mark - Attributes Dictionary - -- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes { - - if (!attributes) { - return nil; - } - - NSMutableDictionary *d = [NSMutableDictionary new]; - - NSInteger ix = 0; - NSString *currentKey = nil; - while (true) { - - const xmlChar *oneAttribute = attributes[ix]; - ix++; - - if (!currentKey && !oneAttribute) { - break; - } - - if (!currentKey) { - currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute]; - } - else { - NSString *value = nil; - if (oneAttribute) { - value = [NSString stringWithUTF8String:(const char *)oneAttribute]; - } - - d[currentKey] = value ? value : @""; - currentKey = nil; - } - } - - return [d copy]; -} - - -#pragma mark - Callbacks - -- (void)xmlEndDocument { - - @autoreleasepool { - if (self.delegateRespondsToEndOfDocumentMethod) { - [self.delegate saxParserDidReachEndOfDocument:self]; - } - - [self endStoringCharacters]; - } -} - - -- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { - - @autoreleasepool { - if (self.storingCharacters) { - [self.characters appendBytes:(const void *)ch length:length]; - } - - if (self.delegateRespondsToCharactersFoundMethod) { - [self.delegate saxParser:self XMLCharactersFound:ch length:length]; - } - } -} - - -- (void)xmlStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - @autoreleasepool { - if (self.delegateRespondsToStartElementMethod) { - - [self.delegate saxParser:self XMLStartElement:localName attributes:attributes]; - } - } -} - - -- (void)xmlEndElement:(const xmlChar *)localName { - - @autoreleasepool { - if (self.delegateRespondsToEndElementMethod) { - [self.delegate saxParser:self XMLEndElement:localName]; - } - - [self endStoringCharacters]; - } -} - - -@end - - -static void startElementSAX(void *context, const xmlChar *localname, const xmlChar **attributes) { - - [(__bridge RSSAXHTMLParser *)context xmlStartElement:localname attributes:attributes]; -} - - -static void endElementSAX(void *context, const xmlChar *localname) { - [(__bridge RSSAXHTMLParser *)context xmlEndElement:localname]; -} - - -static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { - [(__bridge RSSAXHTMLParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; -} - - -static void endDocumentSAX(void *context) { - [(__bridge RSSAXHTMLParser *)context xmlEndDocument]; -} - - -static htmlSAXHandler saxHandlerStruct = { - nil, /* internalSubset */ - nil, /* isStandalone */ - nil, /* hasInternalSubset */ - nil, /* hasExternalSubset */ - nil, /* resolveEntity */ - nil, /* getEntity */ - nil, /* entityDecl */ - nil, /* notationDecl */ - nil, /* attributeDecl */ - nil, /* elementDecl */ - nil, /* unparsedEntityDecl */ - nil, /* setDocumentLocator */ - nil, /* startDocument */ - endDocumentSAX, /* endDocument */ - startElementSAX, /* startElement*/ - endElementSAX, /* endElement */ - nil, /* reference */ - charactersFoundSAX, /* characters */ - nil, /* ignorableWhitespace */ - nil, /* processingInstruction */ - nil, /* comment */ - nil, /* warning */ - nil, /* error */ - nil, /* fatalError //: unused error() get all the errors */ - nil, /* getParameterEntity */ - nil, /* cdataBlock */ - nil, /* externalSubset */ - XML_SAX2_MAGIC, - nil, - nil, /* startElementNs */ - nil, /* endElementNs */ - nil /* serror */ -}; - diff --git a/RSXML/RSSAXParser.h b/RSXML/RSSAXParser.h index c538b1b..b98410c 100644 --- a/RSXML/RSSAXParser.h +++ b/RSXML/RSSAXParser.h @@ -1,12 +1,29 @@ // -// RSSAXParser.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. @import Foundation; +#import /*Thread-safe, not re-entrant. @@ -22,48 +39,39 @@ @protocol RSSAXParserDelegate ++ (BOOL)isHTMLParser; // reusing class method of RSXMLParser delegate + @optional -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const unsigned char **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const unsigned char **)attributes; +// Called when parsing HTML +- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes; +- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName; -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri; - -- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length; - -- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; /*If canceled, may not get called (but might).*/ - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const unsigned char *)name prefix:(const unsigned char *)prefix; /*Okay to return nil. Prefix may be nil.*/ +// Called when parsing XML (Atom, RSS, OPML) +- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes; +- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri; +// Called regardless of parser type +- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const xmlChar *)characters length:(NSUInteger)length; +- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; // If canceled, may not get called (but might). +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix; // Okay to return nil. Prefix may be nil. - (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length; - @end -void RSSAXInitLibXMLParser(void); // Needed by RSSAXHTMLParser. - -/*For use by delegate.*/ - -BOOL RSSAXEqualTags(const unsigned char *localName, const char *tag, NSInteger tagLength); -BOOL RSSAXEqualBytes(const void *bytes1, const void *bytes2, NSUInteger length); - @interface RSSAXParser : NSObject +@property (nonatomic, strong, readonly) NSData *currentCharacters; +@property (nonatomic, strong, readonly) NSString *currentString; +@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; - (instancetype)initWithDelegate:(id)delegate; -- (void)parseData:(NSData *)data; - (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; -- (void)finishParsing; - (void)cancel; - -@property (nonatomic, strong, readonly) NSData *currentCharacters; /*nil if not storing characters. UTF-8 encoded.*/ -@property (nonatomic, strong, readonly) NSString *currentString; /*Convenience to get string version of currentCharacters.*/ -@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; - -- (void)beginStoringCharacters; /*Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.*/ - -/*Delegate can call from within XMLStartElement. Returns nil if numberOfAttributes < 1.*/ +- (void)beginStoringCharacters; - (NSDictionary *)attributesDictionary:(const unsigned char **)attributes numberOfAttributes:(NSInteger)numberOfAttributes; +- (NSDictionary *)attributesDictionaryHTML:(const xmlChar **)attributes; @end diff --git a/RSXML/RSSAXParser.m b/RSXML/RSSAXParser.m index b59797b..126bace 100644 --- a/RSXML/RSSAXParser.m +++ b/RSXML/RSSAXParser.m @@ -1,42 +1,57 @@ // -// RSSAXParser.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import #import #import #import "RSSAXParser.h" -#import "RSXMLInternal.h" @interface RSSAXParser () - @property (nonatomic, weak) id delegate; @property (nonatomic, assign) xmlParserCtxtPtr context; @property (nonatomic, assign) BOOL storingCharacters; @property (nonatomic) NSMutableData *characters; -@property (nonatomic) BOOL delegateRespondsToInternedStringMethod; -@property (nonatomic) BOOL delegateRespondsToInternedStringForValueMethod; -@property (nonatomic) BOOL delegateRespondsToStartElementMethod; -@property (nonatomic) BOOL delegateRespondsToEndElementMethod; -@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; -@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; - +@property (nonatomic, assign) BOOL isHTMLParser; +@property (nonatomic, assign) BOOL delegateRespondsToInternedStringMethod; +@property (nonatomic, assign) BOOL delegateRespondsToInternedStringForValueMethod; +@property (nonatomic, assign) BOOL delegateRespondsToStartElementMethod; +@property (nonatomic, assign) BOOL delegateRespondsToEndElementMethod; +@property (nonatomic, assign) BOOL delegateRespondsToCharactersFoundMethod; +@property (nonatomic, assign) BOOL delegateRespondsToEndOfDocumentMethod; @end @implementation RSSAXParser + (void)initialize { - - RSSAXInitLibXMLParser(); + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + xmlInitParser(); + }); } - #pragma mark - Init - (instancetype)initWithDelegate:(id)delegate { @@ -46,32 +61,23 @@ return nil; _delegate = delegate; - - if ([_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]) { - _delegateRespondsToInternedStringMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]) { - _delegateRespondsToInternedStringForValueMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]) { - _delegateRespondsToStartElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]) { - _delegateRespondsToEndElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { - _delegateRespondsToCharactersFoundMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { - _delegateRespondsToEndOfDocumentMethod = YES; + _delegateRespondsToCharactersFoundMethod = [_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]; + _delegateRespondsToEndOfDocumentMethod = [_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]; + _delegateRespondsToInternedStringMethod = [_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]; + _delegateRespondsToInternedStringForValueMethod = [_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]; + + if ([[_delegate class] respondsToSelector:@selector(isHTMLParser)] && [[_delegate class] isHTMLParser]) { + _isHTMLParser = YES; + _delegateRespondsToStartElementMethod = [_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]; + _delegateRespondsToEndElementMethod = [_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]; + } else { + _delegateRespondsToStartElementMethod = [_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]; + _delegateRespondsToEndElementMethod = [_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]; } return self; } - -#pragma mark - Dealloc - - (void)dealloc { if (_context != nil) { xmlFreeParserCtxt(_context); @@ -83,28 +89,39 @@ #pragma mark - API + static xmlSAXHandler saxHandlerStruct; -- (void)parseData:(NSData *)data { - - [self parseBytes:data.bytes numberOfBytes:data.length]; -} - - +/** + Initialize new xml or html parser context and start processing of data. + */ - (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { if (self.context == nil) { - - self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil); - xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT); + if (self.isHTMLParser) { + xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes); + self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding); + htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT); + } else { + self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil); + xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT); + } } @autoreleasepool { - xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); + if (self.isHTMLParser) { + htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); + } else { + xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); + } } + + [self finishParsing]; } - +/** + Call after @c parseData: or @c parseBytes:numberOfBytes: + */ - (void)finishParsing { NSAssert(self.context != nil, nil); @@ -112,63 +129,70 @@ static xmlSAXHandler saxHandlerStruct; return; @autoreleasepool { - xmlParseChunk(self.context, nil, 0, 1); - xmlFreeParserCtxt(self.context); + if (self.isHTMLParser) { + htmlParseChunk(self.context, nil, 0, 1); + htmlFreeParserCtxt(self.context); + } else { + xmlParseChunk(self.context, nil, 0, 1); + xmlFreeParserCtxt(self.context); + } self.context = nil; self.characters = nil; } } - +/// Will stop the sax parser from processing any further. @c saxParserDidReachEndOfDocument: will not be called. - (void)cancel { - @autoreleasepool { xmlStopParser(self.context); } } - +/** + Delegate can call from @c XMLStartElement. + Characters will be available in @c XMLEndElement as @c currentCharacters property. + Storing characters is stopped after each @c XMLEndElement. + */ - (void)beginStoringCharacters { self.storingCharacters = YES; self.characters = [NSMutableData new]; } - +/// Will be called after each closing tag and the document end. - (void)endStoringCharacters { self.storingCharacters = NO; self.characters = nil; } - +/// @return @c nil if not storing characters. UTF-8 encoded. - (NSData *)currentCharacters { - if (!self.storingCharacters) { return nil; } - return self.characters; } - +/// Convenience method to get string version of @c currentCharacters. - (NSString *)currentString { - NSData *d = self.currentCharacters; - if (RSXMLIsEmpty(d)) { + if (!d || d.length == 0) { return nil; } - return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; } - +/// Trim whitespace and newline characters from @c currentString. - (NSString *)currentStringWithTrimmedWhitespace { - return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; } #pragma mark - Attributes Dictionary + +/** + Delegate can call from within @c XMLStartElement. Returns @c nil if @c numberOfAttributes @c < @c 1. + */ - (NSDictionary *)attributesDictionary:(const xmlChar **)attributes numberOfAttributes:(NSInteger)numberOfAttributes { if (numberOfAttributes < 1 || !attributes) { @@ -178,8 +202,7 @@ static xmlSAXHandler saxHandlerStruct; NSMutableDictionary *d = [NSMutableDictionary new]; @autoreleasepool { - NSInteger i = 0, j = 0; - for (i = 0, j = 0; i < numberOfAttributes; i++, j+=5) { + for (NSInteger i = 0, j = 0; i < numberOfAttributes; i++, j+=5) { NSUInteger lenValue = (NSUInteger)(attributes[j + 4] - attributes[j + 3]); NSString *value = nil; @@ -210,29 +233,48 @@ static xmlSAXHandler saxHandlerStruct; } } } + return d; +} +/** + Delegate can call from within @c XMLStartElement. Returns @c nil if @c numberOfAttributes @c < @c 1. + */ +- (NSDictionary *)attributesDictionaryHTML:(const xmlChar **)attributes { + + if (!attributes) { + return nil; + } + + NSMutableDictionary *d = [NSMutableDictionary new]; + NSInteger ix = 0; + NSString *currentKey = nil; + while (true) { + + const xmlChar *oneAttribute = attributes[ix]; + ix++; + + if (!currentKey && !oneAttribute) { + break; + } + if (!currentKey) { + currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute]; + } + else { + NSString *value = nil; + if (oneAttribute) { + value = [NSString stringWithUTF8String:(const char *)oneAttribute]; + } + d[currentKey] = (value ? value : @""); + currentKey = nil; + } + } return d; } -#pragma mark - Equal Tags - -BOOL RSSAXEqualTags(const xmlChar *localName, const char *tag, NSInteger tagLength) { - - if (!localName) { - return NO; - } - return !strncmp((const char *)localName, tag, (size_t)tagLength); -} - -BOOL RSSAXEqualBytes(const void *bytes1, const void *bytes2, NSUInteger length) { - - return memcmp(bytes1, bytes2, length) == 0; -} - - #pragma mark - Callbacks + - (void)xmlEndDocument { @autoreleasepool { @@ -261,50 +303,72 @@ BOOL RSSAXEqualBytes(const void *bytes1, const void *bytes2, NSUInteger length) - (void)xmlStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(int)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(int)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - @autoreleasepool { - if (self.delegateRespondsToStartElementMethod) { - + if (self.delegateRespondsToStartElementMethod) { + @autoreleasepool { [self.delegate saxParser:self XMLStartElement:localName prefix:prefix uri:uri numberOfNamespaces:numberOfNamespaces namespaces:namespaces numberOfAttributes:numberOfAttributes numberDefaulted:numberDefaulted attributes:attributes]; } } } +- (void)xmlStartHTMLElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { + + if (self.delegateRespondsToStartElementMethod) { + @autoreleasepool { + [self.delegate saxParser:self XMLStartElement:localName attributes:attributes]; + } + } +} + + - (void)xmlEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { @autoreleasepool { if (self.delegateRespondsToEndElementMethod) { [self.delegate saxParser:self XMLEndElement:localName prefix:prefix uri:uri]; } - [self endStoringCharacters]; } } +- (void)xmlEndHTMLElement:(const xmlChar *)localName { + + @autoreleasepool { + if (self.delegateRespondsToEndElementMethod) { + [self.delegate saxParser:self XMLEndElement:localName]; + } + [self endStoringCharacters]; + } +} + @end static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) { - [(__bridge RSSAXParser *)context xmlStartElement:localname prefix:prefix uri:URI numberOfNamespaces:nb_namespaces namespaces:namespaces numberOfAttributes:nb_attributes numberDefaulted:nb_defaulted attributes:attributes]; } - static void endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) { [(__bridge RSSAXParser *)context xmlEndElement:localname prefix:prefix uri:URI]; } - static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { [(__bridge RSSAXParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; } - static void endDocumentSAX(void *context) { [(__bridge RSSAXParser *)context xmlEndDocument]; } +static void startElementSAX_HTML(void *context, const xmlChar *localname, const xmlChar **attributes) { + [(__bridge RSSAXParser *)context xmlStartHTMLElement:localname attributes:attributes]; +} + +static void endElementSAX_HTML(void *context, const xmlChar *localname) { + [(__bridge RSSAXParser *)context xmlEndHTMLElement:localname]; +} + static xmlSAXHandler saxHandlerStruct = { nil, /* internalSubset */ @@ -321,8 +385,8 @@ static xmlSAXHandler saxHandlerStruct = { nil, /* setDocumentLocator */ nil, /* startDocument */ endDocumentSAX, /* endDocument */ - nil, /* startElement*/ - nil, /* endElement */ + startElementSAX_HTML, /* startElement*/ + endElementSAX_HTML, /* endElement */ nil, /* reference */ charactersFoundSAX, /* characters */ nil, /* ignorableWhitespace */ @@ -340,13 +404,3 @@ static xmlSAXHandler saxHandlerStruct = { endElementSAX, /* endElementNs */ nil /* serror */ }; - - -void RSSAXInitLibXMLParser(void) { - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - xmlInitParser(); - }); -} - diff --git a/RSXML/RSXML.h b/RSXML/RSXML.h index b3242d2..342a78d 100644 --- a/RSXML/RSXML.h +++ b/RSXML/RSXML.h @@ -1,36 +1,48 @@ // -// RSXML.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. @import Foundation; - -#import +// General +#import +#import +#import #import +#import +// RSS & Atom Feeds #import -#import #import #import #import #import +// OPML #import #import -#import - -#import -#import - // HTML - -#import - #import -#import #import +#import diff --git a/RSXML/RSXMLData.h b/RSXML/RSXMLData.h index 5004884..4129ce4 100644 --- a/RSXML/RSXMLData.h +++ b/RSXML/RSXMLData.h @@ -1,22 +1,41 @@ // -// RSXMLData.h -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 8/24/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. @import Foundation; +#import "RSXMLParser.h" -NS_ASSUME_NONNULL_BEGIN +@class RSXMLParser; -@interface RSXMLData : NSObject +@interface RSXMLData <__covariant T : RSXMLParser *> : NSObject +@property (nonatomic, readonly, nonnull) NSString *urlString; +@property (nonatomic, readonly, nullable) NSData *data; +@property (nonatomic, readonly, nullable) Class parserClass; +@property (nonatomic, readonly, nullable) NSError *parserError; -- (instancetype)initWithData:(NSData *)data urlString:(NSString *)urlString; +- (instancetype)initWithData:(NSData * _Nonnull)data urlString:(NSString * _Nonnull)urlString; -@property (nonatomic, readonly) NSData *data; -@property (nonatomic, readonly) NSString *urlString; +- (T _Nullable)getParser; +- (BOOL)canParseData; @end - -NS_ASSUME_NONNULL_END diff --git a/RSXML/RSXMLData.m b/RSXML/RSXMLData.m index d6a36f1..f7cd52c 100644 --- a/RSXML/RSXMLData.m +++ b/RSXML/RSXMLData.m @@ -1,28 +1,212 @@ // -// RSXMLData.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 8/24/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import "RSXMLData.h" +#import "RSXMLError.h" +// Parser classes +#import "RSRSSParser.h" +#import "RSAtomParser.h" +#import "RSOPMLParser.h" +#import "RSHTMLMetadataParser.h" @implementation RSXMLData +static const NSUInteger minNumberOfBytesToSearch = 20; +static const NSInteger numberOfCharactersToSearch = 4096; - (instancetype)initWithData:(NSData *)data urlString:(NSString *)urlString { - self = [super init]; - if (!self) { - return nil; + if (self) { + _data = data; + _urlString = urlString; + _parserError = nil; + _parserClass = [self determineParserClass]; // will set error + if (!_parserClass && _parserError) + _data = nil; } - - _data = data; - _urlString = urlString; - return self; } +/** + Get location of @c str in data. May be inaccurate since UTF8 uses multi-byte characters. + */ +- (NSInteger)findCString:(const char*)str { + char *foundStr = strnstr(_data.bytes, str, numberOfCharactersToSearch); + if (foundStr == NULL) { + return NSNotFound; + } + return foundStr - (char*)_data.bytes; +} + +/** + @return @c YES if any of the provided tags is found within the first 4096 bytes. + */ +- (BOOL)matchAny:(const char*[])tags count:(int)len { + for (int i = 0; i < len; i++) { + if ([self findCString:tags[i]] != NSNotFound) { + return YES; + } + } + return NO; +} + +/** + @return @c YES if all of the provided tags are found within the first 4096 bytes. + */ +- (BOOL)matchAll:(const char*[])tags count:(int)len { + for (int i = 0; i < len; i++) { + if ([self findCString:tags[i]] == NSNotFound) { + return NO; + } + } + return YES; +} + +/** + Do a fast @c strnstr() search on the @c char* data. + All strings must match exactly and in the same order provided. + */ +- (BOOL)matchAllInCorrectOrder:(const char*[])tags count:(int)len { + NSInteger oldPos = 0; + for (int i = 0; i < len; i++) { + NSInteger newPos = [self findCString:tags[i]]; + if (newPos == NSNotFound || newPos < oldPos) { + return NO; + } + oldPos = newPos; + } + return YES; +} + + +#pragma mark - Determine XML Parser + + +/** + Try to find the correct parser for the underlying data. Will return @c nil and @c error if couldn't be determined. + + @return Parser class: @c RSRSSParser, @c RSAtomParser, @c RSOPMLParser or @c RSHTMLMetadataParser. + */ +- (nullable Class)determineParserClass { + // TODO: check for things like images and movies and return nil. + if (!_data || _data.length < minNumberOfBytesToSearch) { + // TODO: check size, type, etc. + _parserError = RSXMLMakeError(RSXMLErrorNoData); + return nil; + } + if (NSNotFound == [self findCString:"<"]) { + _parserError = RSXMLMakeError(RSXMLErrorMissingLeftCaret); + return nil; + } + if ([self matchAll:(const char*[]){"level == XML_ERR_FATAL) { int errCode = err->code; char * msg = err->message; //if (err->level == XML_ERR_FATAL) diff --git a/RSXML/RSXMLInternal.h b/RSXML/RSXMLInternal.h deleted file mode 100644 index cb719be..0000000 --- a/RSXML/RSXMLInternal.h +++ /dev/null @@ -1,31 +0,0 @@ -// -// RSXMLInternal.h -// RSXML -// -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -BOOL RSXMLIsEmpty(id _Nullable obj); -BOOL RSXMLStringIsEmpty(NSString * _Nullable s); - - -@interface NSString (RSXMLInternal) - -- (NSString *)rsxml_md5HashString; - -@end - - -@interface NSDictionary (RSXMLInternal) - -- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key; - -@end - -NS_ASSUME_NONNULL_END - diff --git a/RSXML/RSXMLInternal.m b/RSXML/RSXMLInternal.m deleted file mode 100644 index 130b01d..0000000 --- a/RSXML/RSXMLInternal.m +++ /dev/null @@ -1,83 +0,0 @@ -// -// RSXMLInternal.m -// RSXML -// -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import -#import "RSXMLInternal.h" - - -static BOOL RSXMLIsNil(id obj) { - - return obj == nil || obj == [NSNull null]; -} - -BOOL RSXMLIsEmpty(id obj) { - - if (RSXMLIsNil(obj)) { - return YES; - } - - if ([obj respondsToSelector:@selector(count)]) { - return [obj count] < 1; - } - - if ([obj respondsToSelector:@selector(length)]) { - return [obj length] < 1; - } - - return NO; /*Shouldn't get here very often.*/ -} - -BOOL RSXMLStringIsEmpty(NSString *s) { - - return RSXMLIsNil(s) || s.length < 1; -} - - -@implementation NSString (RSXMLInternal) - -- (NSData *)rsxml_md5Hash { - - NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding]; - unsigned char hash[CC_MD5_DIGEST_LENGTH]; - CC_MD5(data.bytes, (CC_LONG)data.length, hash); - - return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH]; -} - -- (NSString *)rsxml_md5HashString { - - NSData *md5Data = [self rsxml_md5Hash]; - const Byte *bytes = md5Data.bytes; - return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]]; -} - -@end - - -@implementation NSDictionary (RSXMLInternal) - - -- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key { - - id obj = self[key]; - if (obj) { - return obj; - } - - for (NSString *oneKey in self.allKeys) { - - if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) { - return self[oneKey]; - } - } - - return nil; -} - - -@end diff --git a/RSXML/RSXMLParser.h b/RSXML/RSXMLParser.h new file mode 100644 index 0000000..eb18953 --- /dev/null +++ b/RSXML/RSXMLParser.h @@ -0,0 +1,69 @@ +// +// MIT License (MIT) +// +// Copyright (c) 2018 Oleg Geier +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +@import Foundation; +#import "RSSAXParser.h" + +#define EqualBytes(bytes1, bytes2, length) (memcmp(bytes1, bytes2, length) == 0) +//#define EqualBytes(bytes1, bytes2, length) (!strncmp(bytes1, bytes2, length)) + +@class RSXMLData; + + +@protocol RSXMLParserDelegate +@optional +/** + A subclass may return a list of tags that the data @c (RSXMLData) should include. + Only if all strings are found (in correct order) the parser will be selected. + + @note This method will only be called if the original data has some weird encoding. + @c RSXMLData will first try to convert the data to an @c UTF8 string, then @c UTF16. + If both conversions fail the parser will be deemed as not suitable for this data. + */ ++ (NSArray *)parserRequireOrderedTags; +/// @return Return @c NO to cancel parsing before it even started. E.g. check if parser is of correct type. +- (BOOL)xmlParserWillStartParsing; + +@required +/// @return @c YES if parser supports parsing feeds (RSS or Atom). ++ (BOOL)isFeedParser; +/// @return @c YES if parser supports parsing OPML files. ++ (BOOL)isOPMLParser; +/// @return @c YES if parser supports parsing HTML files. ++ (BOOL)isHTMLParser; +/// Keeps an internal pointer to the @c RSXMLData and initializes a new @c RSSAXParser. +- (instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData; +/// Will be called after the parsing is finished. @return Reference to parsed object. +- (id)xmlParserWillReturnDocument; +@end + + +@interface RSXMLParser<__covariant T> : NSObject +@property (nonatomic, readonly, nonnull, copy) NSString *documentURI; + +- (T _Nullable)parseSync:(NSError ** _Nullable)error; +- (void)parseAsync:(void(^)(T _Nullable parsedDocument, NSError * _Nullable error))block; +- (BOOL)canParse; + +@end + diff --git a/RSXML/RSXMLParser.m b/RSXML/RSXMLParser.m new file mode 100644 index 0000000..466ff1e --- /dev/null +++ b/RSXML/RSXMLParser.m @@ -0,0 +1,143 @@ +// +// MIT License (MIT) +// +// Copyright (c) 2018 Oleg Geier +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#import "RSXMLParser.h" +#import "RSXMLData.h" +#import "RSXMLError.h" + +@interface RSXMLParser() +@property (nonatomic) RSSAXParser *parser; +@property (nonatomic) NSData *xmlData; +@property (nonatomic, copy) NSError *xmlInputError; +@end + + +@implementation RSXMLParser + ++ (BOOL)isFeedParser { return NO; } // override ++ (BOOL)isOPMLParser { return NO; } // override ++ (BOOL)isHTMLParser { return NO; } // override +- (id)xmlParserWillReturnDocument { return nil; } // override + +/** + Designated initializer. Runs a check whether it matches the detected parser in @c RSXMLData. + */ +- (instancetype)initWithXMLData:(nonnull RSXMLData *)xmlData { + self = [super init]; + if (self) { + _documentURI = [xmlData.urlString copy]; + _xmlInputError = [xmlData.parserError copy]; + [self checkIfParserMatches:xmlData.parserClass]; + _xmlData = xmlData.data; + if (!_xmlData) { + _xmlInputError = RSXMLMakeError(RSXMLErrorNoData); + } + _parser = [[RSSAXParser alloc] initWithDelegate:self]; + } + return self; +} + +/** + Parse the XML data on whatever thread this method is called. + + @param error Sets @c error if parser gets unrecognized data or libxml runs into a parsing error. + @return The parsed object. The object type depends on the underlying data. @c RSParsedFeed, @c RSOPMLItem or @c RSHTMLMetadata. + */ +- (id _Nullable)parseSync:(NSError **)error { + if (_xmlInputError) { + if (error) *error = _xmlInputError; + return nil; + } + + if ([self respondsToSelector:@selector(xmlParserWillStartParsing)] && ![self xmlParserWillStartParsing]) + return nil; + + @autoreleasepool { + xmlResetLastError(); + [_parser parseBytes:_xmlData.bytes numberOfBytes:_xmlData.length]; + if (error) { + *error = RSXMLMakeErrorFromLIBXMLError(xmlGetLastError()); + xmlResetLastError(); + } + } + return [self xmlParserWillReturnDocument]; +} + +/** + Dispatch new background thread, parse the data synchroniously on the background thread and exec callback on the main thread. + */ +- (void)parseAsync:(void(^)(id parsedDocument, NSError *error))block { + dispatch_async(dispatch_get_global_queue(QOS_CLASS_UTILITY, 0), ^{ // QOS_CLASS_DEFAULT + @autoreleasepool { + NSError *error; + id obj = [self parseSync:&error]; + dispatch_async(dispatch_get_main_queue(), ^{ + block(obj, error); + }); + } + }); +} + +/// @return @c YES if @c .xmlInputError is not @c nil. +- (BOOL)canParse { + return (self.xmlInputError != nil); +} + + +#pragma mark - Check Parser Type Matches + + +/** + @return Returns either @c ExpectingFeed, @c ExpectingOPML, @c ExpectingHTML. + @return @c RSXMLErrorNoData for an unexpected class (e.g., if @c RSXMLParser is used directly). + */ +- (RSXMLError)getExpectedErrorForClass:(Class)cls { + if ([cls isFeedParser]) + return RSXMLErrorExpectingFeed; + if ([cls isOPMLParser]) + return RSXMLErrorExpectingOPML; + if ([cls isHTMLParser]) + return RSXMLErrorExpectingHTML; + return RSXMLErrorNoData; // will result in 'Unknown format' +} + +/** + Check whether parsing class matches the expected parsing class. If not set @c .xmlInputError along the way. + + @return @c YES if @c parserClass matches, @c NO otherwise. If @c NO is returned, @c parserError is set also. + */ +- (BOOL)checkIfParserMatches:(Class)xmlParserClass { + if (!xmlParserClass) + return NO; + if (xmlParserClass != [self class]) { // && !_xmlInputError + RSXMLError current = [self getExpectedErrorForClass:[self class]]; + RSXMLError expected = [self getExpectedErrorForClass:xmlParserClass]; + if (current != expected) { + _xmlInputError = RSXMLMakeErrorWrongParser(current, expected); + return NO; + } + } + return YES; // only if no error was set (not now, nor before) +} + +@end diff --git a/RSXMLTests/RSDateParserTests.m b/RSXMLTests/RSDateParserTests.m index b53a4cd..df346eb 100644 --- a/RSXMLTests/RSDateParserTests.m +++ b/RSXMLTests/RSDateParserTests.m @@ -1,10 +1,25 @@ // -// RSDateParserTests.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import @import RSXML; diff --git a/RSXMLTests/RSEntityTests.m b/RSXMLTests/RSEntityTests.m index 3429b43..e9c9068 100644 --- a/RSXMLTests/RSEntityTests.m +++ b/RSXMLTests/RSEntityTests.m @@ -1,10 +1,25 @@ // -// RSEntityTests.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import @import RSXML; @@ -15,7 +30,6 @@ @implementation RSEntityTests - - (void)testInnerAmpersand { NSString *expectedResult = @"A&P"; diff --git a/RSXMLTests/RSHTMLTests.m b/RSXMLTests/RSHTMLTests.m index f58b20c..0b6e31b 100644 --- a/RSXMLTests/RSHTMLTests.m +++ b/RSXMLTests/RSHTMLTests.m @@ -1,13 +1,29 @@ // -// RSHTMLTests.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 3/5/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -@import RSXML; #import +@import RSXML; @interface RSHTMLTests : XCTestCase @@ -15,169 +31,109 @@ @implementation RSHTMLTests ++ (NSArray *)defaultPerformanceMetrics { + return @[XCTPerformanceMetric_WallClockTime, @"com.apple.XCTPerformanceMetric_TotalHeapAllocationsKilobytes"]; +} -+ (RSXMLData *)xmlData:(NSString *)title urlString:(NSString *)urlString { - +- (RSXMLData *)xmlData:(NSString *)title urlString:(NSString *)urlString { NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:title ofType:@"html" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - return [[RSXMLData alloc] initWithData:d urlString:urlString]; + return [[RSXMLData alloc] initWithData:[[NSData alloc] initWithContentsOfFile:s] urlString:urlString]; } - -+ (RSXMLData *)daringFireballData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - xmlData = [self xmlData:@"DaringFireball" urlString:@"http://daringfireball.net/"]; - }); - - return xmlData; -} - - -+ (RSXMLData *)furboData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - xmlData = [self xmlData:@"furbo" urlString:@"http://furbo.org/"]; - }); - - return xmlData; -} - - -+ (RSXMLData *)inessentialData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - xmlData = [self xmlData:@"inessential" urlString:@"http://inessential.com/"]; - }); - - return xmlData; -} - - -+ (RSXMLData *)sixcolorsData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - xmlData = [self xmlData:@"sixcolors" urlString:@"https://sixcolors.com/"]; - }); - - return xmlData; -} - - - (void)testDaringFireball { - RSXMLData *xmlData = [[self class] daringFireballData]; - RSHTMLMetadata *metadata = [RSHTMLMetadataParser HTMLMetadataWithXMLData:xmlData]; - + RSXMLData *xmlData = [self xmlData:@"DaringFireball" urlString:@"http://daringfireball.net/"]; + XCTAssertTrue([xmlData.parserClass isHTMLParser]); + RSHTMLMetadataParser *parser = [[RSHTMLMetadataParser alloc] initWithXMLData:xmlData]; + NSError *error; + RSHTMLMetadata *metadata = [parser parseSync:&error]; + XCTAssertNil(error); XCTAssertEqualObjects(metadata.faviconLink, @"http://daringfireball.net/graphics/favicon.ico?v=005"); XCTAssertTrue(metadata.feedLinks.count == 1); RSHTMLMetadataFeedLink *feedLink = metadata.feedLinks[0]; XCTAssertNil(feedLink.title); - XCTAssertEqualObjects(feedLink.type, @"application/atom+xml"); - XCTAssertEqualObjects(feedLink.urlString, @"http://daringfireball.net/feeds/main"); -} - - -- (void)testDaringFireballPerformance { - - RSXMLData *xmlData = [[self class] daringFireballData]; - + XCTAssertEqual(feedLink.type, RSFeedTypeAtom); + XCTAssertEqualObjects(feedLink.link, @"http://daringfireball.net/feeds/main"); + [self measureBlock:^{ - (void)[RSHTMLMetadataParser HTMLMetadataWithXMLData:xmlData]; + for (int i = 0; i < 10; i++) + [parser parseSync:nil]; }]; } - (void)testFurbo { - RSXMLData *xmlData = [[self class] furboData]; - RSHTMLMetadata *metadata = [RSHTMLMetadataParser HTMLMetadataWithXMLData:xmlData]; - + RSXMLData *xmlData = [self xmlData:@"furbo" urlString:@"http://furbo.org/"]; + XCTAssertTrue([xmlData.parserClass isHTMLParser]); + RSHTMLMetadataParser *parser = [[RSHTMLMetadataParser alloc] initWithXMLData:xmlData]; + NSError *error; + RSHTMLMetadata *metadata = [parser parseSync:&error]; + XCTAssertNil(error); XCTAssertEqualObjects(metadata.faviconLink, @"http://furbo.org/favicon.ico"); XCTAssertTrue(metadata.feedLinks.count == 1); RSHTMLMetadataFeedLink *feedLink = metadata.feedLinks[0]; XCTAssertEqualObjects(feedLink.title, @"Iconfactory News Feed"); - XCTAssertEqualObjects(feedLink.type, @"application/rss+xml"); -} - - -- (void)testFurboPerformance { - - RSXMLData *xmlData = [[self class] furboData]; - + XCTAssertEqual(feedLink.type, RSFeedTypeRSS); + [self measureBlock:^{ - (void)[RSHTMLMetadataParser HTMLMetadataWithXMLData:xmlData]; + for (int i = 0; i < 10; i++) + [parser parseSync:nil]; }]; } - - (void)testInessential { - RSXMLData *xmlData = [[self class] inessentialData]; - RSHTMLMetadata *metadata = [RSHTMLMetadataParser HTMLMetadataWithXMLData:xmlData]; - + RSXMLData *xmlData = [self xmlData:@"inessential" urlString:@"http://inessential.com/"]; + XCTAssertTrue([xmlData.parserClass isHTMLParser]); + RSHTMLMetadataParser *parser = [[RSHTMLMetadataParser alloc] initWithXMLData:xmlData]; + NSError *error; + RSHTMLMetadata *metadata = [parser parseSync:&error]; + XCTAssertNil(error); XCTAssertNil(metadata.faviconLink); XCTAssertTrue(metadata.feedLinks.count == 1); RSHTMLMetadataFeedLink *feedLink = metadata.feedLinks[0]; XCTAssertEqualObjects(feedLink.title, @"RSS"); - XCTAssertEqualObjects(feedLink.type, @"application/rss+xml"); - XCTAssertEqualObjects(feedLink.urlString, @"http://inessential.com/xml/rss.xml"); - - XCTAssertEqual(metadata.appleTouchIcons.count, 0u); -} - - -- (void)testInessentialPerformance { - - RSXMLData *xmlData = [[self class] inessentialData]; + XCTAssertEqual(feedLink.type, RSFeedTypeRSS); + XCTAssertEqualObjects(feedLink.link, @"http://inessential.com/xml/rss.xml"); + XCTAssertEqual(metadata.iconLinks.count, 0u); + [self measureBlock:^{ - (void)[RSHTMLMetadataParser HTMLMetadataWithXMLData:xmlData]; + for (int i = 0; i < 10; i++) + [parser parseSync:nil]; }]; } - - (void)testSixcolors { - RSXMLData *xmlData = [[self class] sixcolorsData]; - RSHTMLMetadata *metadata = [RSHTMLMetadataParser HTMLMetadataWithXMLData:xmlData]; + RSXMLData *xmlData = [self xmlData:@"sixcolors" urlString:@"https://sixcolors.com/"]; + XCTAssertTrue([xmlData.parserClass isHTMLParser]); + RSHTMLMetadataParser *parser = [[RSHTMLMetadataParser alloc] initWithXMLData:xmlData]; + NSError *error; + RSHTMLMetadata *metadata = [parser parseSync:&error]; + XCTAssertNil(error); XCTAssertEqualObjects(metadata.faviconLink, @"https://sixcolors.com/images/favicon.ico"); XCTAssertTrue(metadata.feedLinks.count == 1); RSHTMLMetadataFeedLink *feedLink = metadata.feedLinks[0]; XCTAssertEqualObjects(feedLink.title, @"RSS"); - XCTAssertEqualObjects(feedLink.type, @"application/rss+xml"); - XCTAssertEqualObjects(feedLink.urlString, @"http://feedpress.me/sixcolors"); + XCTAssertEqual(feedLink.type, RSFeedTypeRSS); + XCTAssertEqualObjects(feedLink.link, @"http://feedpress.me/sixcolors"); - XCTAssertEqual(metadata.appleTouchIcons.count, 6u); - RSHTMLMetadataAppleTouchIcon *icon = metadata.appleTouchIcons[3]; - XCTAssertEqualObjects(icon.rel, @"apple-touch-icon"); + XCTAssertEqual(metadata.iconLinks.count, 6u); + RSHTMLMetadataIconLink *icon = metadata.iconLinks[3]; + XCTAssertEqualObjects(icon.title, @"apple-touch-icon"); XCTAssertEqualObjects(icon.sizes, @"120x120"); - XCTAssertEqualObjects(icon.urlString, @"https://sixcolors.com/apple-touch-icon-120.png"); -} - - -- (void)testSixcolorsPerformance { - - RSXMLData *xmlData = [[self class] sixcolorsData]; - + XCTAssertEqual([icon getSize].width, 120); + XCTAssertEqualObjects(icon.link, @"https://sixcolors.com/apple-touch-icon-120.png"); + [self measureBlock:^{ - (void)[RSHTMLMetadataParser HTMLMetadataWithXMLData:xmlData]; + for (int i = 0; i < 10; i++) + [parser parseSync:nil]; }]; } @@ -185,32 +141,35 @@ - (void)testSixColorsLinks { - RSXMLData *xmlData = [[self class] sixcolorsData]; - NSArray *links = [RSHTMLLinkParser htmlLinksWithData:xmlData]; - - NSString *linkToFind = @"https://www.theincomparable.com/theincomparable/290/index.php"; - NSString *textToFind = @"this week’s episode of The Incomparable"; + RSXMLData *xmlData = [self xmlData:@"sixcolors" urlString:@"https://sixcolors.com/"]; + XCTAssertTrue([xmlData.parserClass isHTMLParser]); + RSHTMLLinkParser *parser = [[RSHTMLLinkParser alloc] initWithXMLData:xmlData]; + NSError *error; + NSArray *links = [parser parseSync:&error]; + XCTAssertNil(error); BOOL found = NO; - for (RSHTMLLink *oneLink in links) { - - if ([oneLink.urlString isEqualToString:linkToFind] && [oneLink.text isEqualToString:textToFind]) { + for (RSHTMLMetadataAnchor *oneLink in links) { + if ([oneLink.title isEqualToString:@"this week’s episode of The Incomparable"] && + [oneLink.link isEqualToString:@"https://www.theincomparable.com/theincomparable/290/index.php"]) + { found = YES; break; } } - + // item No 11 to ensure .text removes + XCTAssertEqualObjects(links[11].title, @"Podcasting"); + XCTAssertEqualObjects(links[11].link, @"https://sixcolors.com/topic/podcasting/"); + // item No. 18 & 19 to ensure 'Topics' is skipped + XCTAssertEqualObjects(links[18].title, @"Podcasts"); + XCTAssertEqualObjects(links[18].link, @"https://sixcolors.com/podcasts/"); + XCTAssertEqualObjects(links[19].title, @"Gift Guide"); + XCTAssertEqualObjects(links[19].link, @"https://sixcolors.com/topic/giftguide/"); XCTAssertTrue(found, @"Expected link should have been found."); - XCTAssertEqual(links.count, 131u, @"Expected 131 links."); -} - - -- (void)testSixColorsLinksPerformance { - - RSXMLData *xmlData = [[self class] sixcolorsData]; + XCTAssertEqual(links.count, 130u, @"Expected 130 links."); [self measureBlock:^{ - (void)[RSHTMLLinkParser htmlLinksWithData:xmlData]; + [parser parseSync:nil]; }]; } diff --git a/RSXMLTests/RSOPMLTests.m b/RSXMLTests/RSOPMLTests.m index 19a34d0..39582f7 100644 --- a/RSXMLTests/RSOPMLTests.m +++ b/RSXMLTests/RSOPMLTests.m @@ -1,10 +1,26 @@ // -// RSOPMLTests.m -// RSXML +// MIT License (MIT) // -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import @import RSXML; @@ -15,63 +31,62 @@ @implementation RSOPMLTests -+ (RSXMLData *)subsData { ++ (NSArray *)defaultPerformanceMetrics { + return @[XCTPerformanceMetric_WallClockTime, @"com.apple.XCTPerformanceMetric_TotalHeapAllocationsKilobytes"]; +} - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:@"Subs" ofType:@"opml" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - xmlData = [[RSXMLData alloc] initWithData:d urlString:@"http://example.org/"]; - }); - - return xmlData; +- (RSXMLData*)xmlFile:(NSString*)name extension:(NSString*)ext { + NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:name ofType:ext inDirectory:@"Resources"]; + if (s == nil) return nil; + NSData *d = [[NSData alloc] initWithContentsOfFile:s]; + return [[RSXMLData alloc] initWithData:d urlString:[NSString stringWithFormat:@"%@.%@", name, ext]]; } - (void)testNotOPML { - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:@"DaringFireball" ofType:@"rss" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - RSXMLData *xmlData = [[RSXMLData alloc] initWithData:d urlString:@"http://example.org/"]; + NSError *error; + RSXMLData *xmlData = [self xmlFile:@"DaringFireball" extension:@"atom"]; + XCTAssertNotEqualObjects(xmlData.parserClass, [RSOPMLParser class]); + XCTAssertNil(xmlData.parserError); + RSOPMLParser *parser = [[RSOPMLParser alloc] initWithXMLData:xmlData]; - XCTAssertNotNil(parser.error); - XCTAssert(parser.error.code == RSXMLErrorFileNotOPML); - XCTAssert([parser.error.domain isEqualTo:kRSXMLParserErrorDomain]); + RSOPMLItem *document = [parser parseSync:&error]; + XCTAssertNil(document); + XCTAssertNotNil(error); + XCTAssertEqual(error.code, RSXMLErrorExpectingOPML); + XCTAssertEqualObjects(error.domain, kRSXMLParserErrorDomain); - d = [[NSData alloc] initWithContentsOfFile:@"/System/Library/Kernels/kernel"]; - xmlData = [[RSXMLData alloc] initWithData:d urlString:@"/System/Library/Kernels/kernel"]; - parser = [[RSOPMLParser alloc] initWithXMLData:xmlData]; - XCTAssertNotNil(parser.error); + xmlData = [[RSXMLData alloc] initWithData:[[NSData alloc] initWithContentsOfFile:@"/System/Library/Kernels/kernel"] + urlString:@"/System/Library/Kernels/kernel"]; + XCTAssertNotNil(xmlData.parserError); + XCTAssert(xmlData.parserError.code == RSXMLErrorMissingLeftCaret); + RSXMLParser *parser2 = [xmlData getParser]; + XCTAssertNil(parser2); + XCTAssertNotNil(xmlData.parserError); + XCTAssert(xmlData.parserError.code == RSXMLErrorMissingLeftCaret); // error should not be overwritten + } - -- (void)testSubsPerformance { - - RSXMLData *xmlData = [[self class] subsData]; - - [self measureBlock:^{ - (void)[[RSOPMLParser alloc] initWithXMLData:xmlData]; - }]; -} - - - (void)testSubsStructure { - RSXMLData *xmlData = [[self class] subsData]; - - RSOPMLParser *parser = [[RSOPMLParser alloc] initWithXMLData:xmlData]; - XCTAssertNotNil(parser); - - RSOPMLItem *document = parser.opmlDocument; + RSXMLData *xmlData = [self xmlFile:@"Subs" extension:@"opml"]; + XCTAssertEqualObjects(xmlData.parserClass, [RSOPMLParser class]); + + NSError *error; + RSOPMLParser *parser = [xmlData getParser]; + RSOPMLItem *document = [parser parseSync:&error]; XCTAssertNotNil(document); - XCTAssert([document.displayName isEqualToString:@"Subs"]); - XCTAssert([document.children.firstObject.displayName isEqualToString:@"Daring Fireball"]); - XCTAssert([document.children.lastObject.displayName isEqualToString:@"Writers"]); - XCTAssert([document.children.lastObject.children.lastObject.displayName isEqualToString:@"Gerrold"]); + XCTAssertEqualObjects(document.displayName, @"Subs"); + XCTAssertEqualObjects(document.children.firstObject.displayName, @"Daring Fireball"); + XCTAssertEqualObjects(document.children.lastObject.displayName, @"Writers"); + XCTAssertEqualObjects(document.children.lastObject.children.lastObject.displayName, @"Gerrold"); [self checkStructureForOPMLItem:document isRoot:YES]; //NSLog(@"\n%@", [document recursiveDescription]); + + [self measureBlock:^{ + [parser parseSync:nil]; + }]; } - (void)checkStructureForOPMLItem:(RSOPMLItem *)item isRoot:(BOOL)root { @@ -98,5 +113,4 @@ } } - @end diff --git a/RSXMLTests/RSXMLTests.m b/RSXMLTests/RSXMLTests.m index b9702e5..9621dc8 100644 --- a/RSXMLTests/RSXMLTests.m +++ b/RSXMLTests/RSXMLTests.m @@ -1,10 +1,26 @@ // -// RSXMLTests.m -// RSXMLTests +// MIT License (MIT) // -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// Copyright (c) 2016 Brent Simmons +// Copyright (c) 2018 Oleg Geier // +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. #import @import RSXML; @@ -15,226 +31,253 @@ @implementation RSXMLTests -+ (RSXMLData *)oftData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:@"OneFootTsunami" ofType:@"atom" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - xmlData = [[RSXMLData alloc] initWithData:d urlString:@"http://onefoottsunami.com/"]; - }); - - return xmlData; +/** @see https://indiestack.com/2018/02/xcodes-secret-performance-tests/ + + "com.apple.XCTPerformanceMetric_WallClockTime" + "com.apple.XCTPerformanceMetric_UserTime" + "com.apple.XCTPerformanceMetric_RunTime" + "com.apple.XCTPerformanceMetric_SystemTime" + "com.apple.XCTPerformanceMetric_HighWaterMarkForHeapAllocations" + "com.apple.XCTPerformanceMetric_HighWaterMarkForVMAllocations" + "com.apple.XCTPerformanceMetric_PersistentHeapAllocations" + "com.apple.XCTPerformanceMetric_PersistentHeapAllocationsNodes" + "com.apple.XCTPerformanceMetric_PersistentVMAllocations" + "com.apple.XCTPerformanceMetric_TemporaryHeapAllocationsKilobytes" + "com.apple.XCTPerformanceMetric_TotalHeapAllocationsKilobytes" + "com.apple.XCTPerformanceMetric_TransientHeapAllocationsKilobytes" + "com.apple.XCTPerformanceMetric_TransientHeapAllocationsNodes" + "com.apple.XCTPerformanceMetric_TransientVMAllocationsKilobytes" + */ ++ (NSArray *)defaultPerformanceMetrics { + return @[XCTPerformanceMetric_WallClockTime, @"com.apple.XCTPerformanceMetric_TotalHeapAllocationsKilobytes"]; } +// http://onefoottsunami.com/ +// http://scripting.com/ +// http://manton.org/ +// http://daringfireball.net/ +// http://katiefloyd.com/ +// https://medium.com/@emarley -+ (RSXMLData *)scriptingNewsData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:@"scriptingNews" ofType:@"rss" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - xmlData = [[RSXMLData alloc] initWithData:d urlString:@"http://scripting.com/"]; - }); - - return xmlData; +- (RSXMLData*)xmlFile:(NSString*)name extension:(NSString*)ext { + NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:name ofType:ext inDirectory:@"Resources"]; + if (s == nil) return nil; + NSData *d = [[NSData alloc] initWithContentsOfFile:s]; + return [[RSXMLData alloc] initWithData:d urlString:[NSString stringWithFormat:@"%@.%@", name, ext]]; } - -+ (RSXMLData *)mantonData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:@"manton" ofType:@"rss" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - xmlData = [[RSXMLData alloc] initWithData:d urlString:@"http://manton.org/"]; - }); - - return xmlData; +- (RSFeedParser*)parserForFile:(NSString*)name extension:(NSString*)ext expect:(Class)cls { + RSXMLData *xmlData = [self xmlFile:name extension:ext]; + XCTAssertEqual(xmlData.parserClass, cls); + return [xmlData getParser]; } +#pragma mark - Completeness Tests -+ (RSXMLData *)daringFireballData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:@"DaringFireball" ofType:@"rss" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - xmlData = [[RSXMLData alloc] initWithData:d urlString:@"http://daringfireball.net/"]; - }); - - return xmlData; +- (void)testAsync { + RSXMLData *xmlData = [self xmlFile:@"OneFootTsunami" extension:@"atom"]; + [[xmlData getParser] parseAsync:^(RSParsedFeed *parsedDocument, NSError *error) { + XCTAssertEqualObjects(parsedDocument.title, @"One Foot Tsunami"); + XCTAssertEqualObjects(parsedDocument.subtitle, @"Slightly less disappointing than it sounds"); + XCTAssertEqualObjects(parsedDocument.link, @"http://onefoottsunami.com"); + XCTAssertEqual(parsedDocument.articles.count, 25u); + + RSParsedArticle *a = parsedDocument.articles.firstObject; + XCTAssertEqualObjects(a.title, @"Link: Pillow Fight Leaves 24 Concussed"); + XCTAssertEqualObjects(a.link, @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all"); + XCTAssertEqualObjects(a.guid, @"http://onefoottsunami.com/?p=14863"); + XCTAssertEqual(a.datePublished, [NSDate dateWithTimeIntervalSince1970:1441722101]); // 2015-09-08T14:21:41Z + }]; } - -+ (RSXMLData *)katieFloydData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:@"KatieFloyd" ofType:@"rss" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - xmlData = [[RSXMLData alloc] initWithData:d urlString:@"http://katiefloyd.com/"]; - }); - - return xmlData; -} - - -+ (RSXMLData *)eMarleyData { - - static RSXMLData *xmlData = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:@"EMarley" ofType:@"rss" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - xmlData = [[RSXMLData alloc] initWithData:d urlString:@"https://medium.com/@emarley"]; - }); - - return xmlData; -} - - - (void)testOneFootTsunami { + RSXMLData *xmlData = [self xmlFile:@"OneFootTsunami" extension:@"atom"]; + XCTAssertEqual(xmlData.parserClass, [RSAtomParser class]); + NSError *error = nil; - RSXMLData *xmlData = [[self class] oftData]; - RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error); - NSLog(@"parsedFeed: %@", parsedFeed); -} - - -- (void)testOFTPerformance { - - RSXMLData *xmlData = [[self class] oftData]; - + RSParsedFeed *parsedFeed = [[xmlData getParser] parseSync:&error]; + XCTAssertEqualObjects(parsedFeed.title, @"One Foot Tsunami"); + XCTAssertEqualObjects(parsedFeed.subtitle, @"Slightly less disappointing than it sounds"); + XCTAssertEqualObjects(parsedFeed.link, @"http://onefoottsunami.com"); + XCTAssertEqual(parsedFeed.articles.count, 25u); + + RSParsedArticle *a = parsedFeed.articles.firstObject; + XCTAssertEqualObjects(a.title, @"Link: Pillow Fight Leaves 24 Concussed"); + XCTAssertEqualObjects(a.link, @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all"); + XCTAssertEqualObjects(a.guid, @"http://onefoottsunami.com/?p=14863"); + XCTAssertEqual(a.datePublished, [NSDate dateWithTimeIntervalSince1970:1441722101]); // 2015-09-08T14:21:41Z + [self measureBlock:^{ - NSError *error = nil; - RSParseFeedSync(xmlData, &error); + [[xmlData getParser] parseSync:nil]; }]; } - (void)testScriptingNews { + RSXMLData *xmlData = [self xmlFile:@"scriptingNews" extension:@"rss"]; + XCTAssertEqual(xmlData.parserClass, [RSRSSParser class]); + NSError *error = nil; - RSXMLData *xmlData = [[self class] scriptingNewsData]; - RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error); - NSLog(@"parsedFeed: %@", parsedFeed); + RSParsedFeed *parsedFeed = [[xmlData getParser] parseSync:&error]; + XCTAssertEqualObjects(parsedFeed.title, @"Scripting News"); + XCTAssertEqualObjects(parsedFeed.subtitle, @"Scripting News, the weblog started in 1997 that bootstrapped the blogging revolution..."); + XCTAssertEqualObjects(parsedFeed.link, @"http://scripting.com/"); + XCTAssertEqual(parsedFeed.articles.count, 25u); + + RSParsedArticle *a = parsedFeed.articles.firstObject; + XCTAssertEqualObjects(a.title, @"People don't click links, that's why the 140-char limit will cripple Twitter"); + XCTAssertEqualObjects(a.link, @"http://scripting.com/2015/09/08/peopleDontClickLinks.html"); + XCTAssertEqualObjects(a.guid, @"http://scripting.com/2015/09/08/peopleDontClickLinks.html"); + XCTAssertEqual(a.datePublished, [NSDate dateWithTimeIntervalSince1970:1441723501]); // Tue Sep 8 16:45:01 2015 + + [self measureBlock:^{ + [[xmlData getParser] parseSync:nil]; + }]; } - (void)testManton { + RSXMLData *xmlData = [self xmlFile:@"manton" extension:@"rss"]; + XCTAssertEqual(xmlData.parserClass, [RSRSSParser class]); + NSError *error = nil; - RSXMLData *xmlData = [[self class] mantonData]; - RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error); - NSLog(@"parsedFeed: %@", parsedFeed); + RSParsedFeed *parsedFeed = [[xmlData getParser] parseSync:&error]; + XCTAssertEqualObjects(parsedFeed.title, @"Manton Reece"); + XCTAssertNil(parsedFeed.subtitle); + XCTAssertEqualObjects(parsedFeed.link, @"http://www.manton.org"); + XCTAssertEqual(parsedFeed.articles.count, 10u); + + RSParsedArticle *a = parsedFeed.articles.firstObject; + XCTAssertNil(a.title); + XCTAssertEqualObjects(a.link, @"http://www.manton.org/2015/09/3071.html"); + XCTAssertEqualObjects(a.guid, @"http://www.manton.org/?p=3071"); + XCTAssertEqual(a.datePublished, [NSDate dateWithTimeIntervalSince1970:1443191200]); // Fri, 25 Sep 2015 14:26:40 +0000 + + [self measureBlock:^{ + [[xmlData getParser] parseSync:nil]; + }]; } - (void)testKatieFloyd { + RSXMLData *xmlData = [self xmlFile:@"KatieFloyd" extension:@"rss"]; + XCTAssertEqual(xmlData.parserClass, [RSRSSParser class]); + NSError *error = nil; - RSXMLData *xmlData = [[self class] katieFloydData]; - RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error); + RSParsedFeed *parsedFeed = [[xmlData getParser] parseSync:&error]; XCTAssertEqualObjects(parsedFeed.title, @"Katie Floyd"); + XCTAssertNil(parsedFeed.subtitle); + XCTAssertEqualObjects(parsedFeed.link, @"http://www.katiefloyd.com"); + XCTAssertEqual(parsedFeed.articles.count, 20u); + + RSParsedArticle *a = parsedFeed.articles.firstObject; + XCTAssertEqualObjects(a.title, @"Special Mac Power Users for Relay FM Members"); + XCTAssertEqualObjects(a.link, @"http://tracking.feedpress.it/link/980/4243452"); + XCTAssertEqualObjects(a.guid, @"50c628b3e4b07b56461546c5:50c658a6e4b0cc9aa9ce4405:57bcbe83e4fcb567fdffc020"); + XCTAssertEqual(a.datePublished, [NSDate dateWithTimeIntervalSince1970:1472163600]); // Thu, 25 Aug 2016 22:20:00 +0000 + + [self measureBlock:^{ + [[xmlData getParser] parseSync:nil]; + }]; } - (void)testEMarley { - NSError *error = nil; - RSXMLData *xmlData = [[self class] eMarleyData]; - RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error); - XCTAssertEqualObjects(parsedFeed.title, @"Stories by Liz Marley on Medium"); - XCTAssertEqual(parsedFeed.articles.count, 10u); -} - - -- (void)testScriptingNewsPerformance { - - RSXMLData *xmlData = [[self class] scriptingNewsData]; - - [self measureBlock:^{ - NSError *error = nil; - RSParseFeedSync(xmlData, &error); - }]; - -} - - -- (void)testMantonPerformance { - - RSXMLData *xmlData = [[self class] mantonData]; - - [self measureBlock:^{ - NSError *error = nil; - RSParseFeedSync(xmlData, &error); - }]; - -} - - -- (void)testDaringFireballPerformance { - - RSXMLData *xmlData = [[self class] daringFireballData]; - - [self measureBlock:^{ - NSError *error = nil; - RSParseFeedSync(xmlData, &error); - }]; -} - - -- (void)testCanParseFeedPerformance { + RSXMLData *xmlData = [self xmlFile:@"EMarley" extension:@"rss"]; + XCTAssertEqual(xmlData.parserClass, [RSRSSParser class]); + + NSError *error = nil; + RSParsedFeed *parsedFeed = [[xmlData getParser] parseSync:&error]; + XCTAssertEqualObjects(parsedFeed.title, @"Stories by Liz Marley on Medium"); + XCTAssertEqualObjects(parsedFeed.subtitle, @"Stories by Liz Marley on Medium"); + XCTAssertEqualObjects(parsedFeed.link, @"https://medium.com/@emarley?source=rss-b4981c59ffa5------2"); + XCTAssertEqual(parsedFeed.articles.count, 10u); + + RSParsedArticle *a = parsedFeed.articles.firstObject; + XCTAssertEqualObjects(a.title, @"UI Automation & screenshots"); + XCTAssertEqualObjects(a.link, @"https://medium.com/@emarley/ui-automation-screenshots-c44a41af38d1?source=rss-b4981c59ffa5------2"); + XCTAssertEqualObjects(a.guid, @"https://medium.com/p/c44a41af38d1"); + XCTAssertEqual(a.datePublished, [NSDate dateWithTimeIntervalSince1970:1462665210]); // Sat, 07 May 2016 23:53:30 GMT - RSXMLData *xmlData = [[self class] daringFireballData]; - // 0.379 [self measureBlock:^{ - for (NSInteger i = 0; i < 100; i++) { - RSCanParseFeed(xmlData); - } + [[xmlData getParser] parseSync:nil]; }]; } +- (void)testDaringFireball { + + RSXMLData *xmlData = [self xmlFile:@"DaringFireball" extension:@"atom"]; + XCTAssertEqual(xmlData.parserClass, [RSAtomParser class]); + + NSError *error = nil; + RSParsedFeed *parsedFeed = [[xmlData getParser] parseSync:&error]; + XCTAssertEqualObjects(parsedFeed.title, @"Daring Fireball"); + XCTAssertEqualObjects(parsedFeed.subtitle, @"By John Gruber"); + XCTAssertEqualObjects(parsedFeed.link, @"http://daringfireball.net/"); + XCTAssertEqual(parsedFeed.articles.count, 47u); + + RSParsedArticle *a = parsedFeed.articles.firstObject; + XCTAssertEqualObjects(a.title, @"Apple Product Event: Monday March 21"); + XCTAssertEqualObjects(a.link, @"http://recode.net/2016/02/27/remark-your-calendars-apples-product-event-will-week-of-march-21/"); + XCTAssertEqualObjects(a.guid, @"tag:daringfireball.net,2016:/linked//6.32173"); + XCTAssertEqual(a.datePublished, [NSDate dateWithTimeIntervalSince1970:1456610387]); // 2016-02-27T21:59:47Z + + [self measureBlock:^{ + [[xmlData getParser] parseSync:nil]; + }]; +} + + +#pragma mark - Variety Test & Other + + - (void)testDownloadedFeeds { NSError *error = nil; int i = 0; while (true) { ++i; - NSString *pth = [NSString stringWithFormat:@"feed_%d", i]; - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:pth ofType:@"rss" inDirectory:@"Resources"]; - if (s == nil) { - break; - } - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - RSXMLData *xmlData = [[RSXMLData alloc] initWithData:d urlString:pth]; - RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error); - printf("\n\nparsing: %s\n%s\n", pth.UTF8String, parsedFeed.description.UTF8String); + RSXMLData *xmlData = [self xmlFile:[NSString stringWithFormat:@"feed_%d", i] extension:@"rss"]; + if (!xmlData) break; + RSParsedFeed *parsedFeed = [[xmlData getParser] parseSync:&error]; XCTAssertNil(error); + XCTAssert(parsedFeed); + XCTAssert(parsedFeed.title); + XCTAssert(parsedFeed.link); + XCTAssert(parsedFeed.articles.count > 0); + //printf("\n\nparsing: %s\n%s\n", xmlData.urlString.UTF8String, parsedFeed.description.UTF8String); } } +- (void)testDownloadedFeedsPerformance { + [self measureBlock:^{ + [self testDownloadedFeeds]; + }]; +} + - (void)testSingle { NSError *error = nil; - NSString *filename = @"feed_1"; - NSString *s = [[NSBundle bundleForClass:[self class]] pathForResource:filename ofType:@"rss" inDirectory:@"Resources"]; - NSData *d = [[NSData alloc] initWithContentsOfFile:s]; - RSXMLData *xmlData = [[RSXMLData alloc] initWithData:d urlString:@"single-feed"]; - RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error); - printf("\n\nparsing: %s\n%s\n", filename.UTF8String, parsedFeed.description.UTF8String); + RSXMLData *xmlData = [self xmlFile:@"feed_1" extension:@"rss"]; + RSParsedFeed *parsedFeed = [[xmlData getParser] parseSync:&error]; + printf("\n\nparsing: %s\n%s\n", xmlData.urlString.UTF8String, parsedFeed.description.UTF8String); XCTAssertNil(error); } +- (void)testDetermineParserClassPerformance { + + RSXMLData *xmlData = [self xmlFile:@"DaringFireball" extension:@"atom"]; +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wundeclared-selector" + [self measureBlock:^{ + for (NSInteger i = 0; i < 100; i++) { + [xmlData performSelector:@selector(determineParserClass)]; + } + }]; +#pragma clang diagnostic pop +} + @end diff --git a/RSXMLTests/Resources/DaringFireball.rss b/RSXMLTests/Resources/DaringFireball.atom similarity index 100% rename from RSXMLTests/Resources/DaringFireball.rss rename to RSXMLTests/Resources/DaringFireball.atom diff --git a/RSXMLTests/Resources/TimerSearch.txt b/RSXMLTests/Resources/TimerSearch.txt deleted file mode 100644 index 17702e2..0000000 --- a/RSXMLTests/Resources/TimerSearch.txt +++ /dev/null @@ -1,31 +0,0 @@ -class SomeViewController: NSViewController { - - @IBOutlet weak var textField: NSTextField - private var NSTimer: fetchDataTimer? - private var currentText: String? { - didSet { - invalidateTimer() - if currentText.length > 3 { - restartTimer() - } - } - } - - func textDidChange(notification: NSNotification) { - - currentText = textField.stringValue - } - - func invalidateTimer() { - - if let timer = timer { - if timer.isValid { - timer.invalidate() - } - self.timer = nil - } - } - - - -} \ No newline at end of file