Added all bugfixes from current state of RSParser. Except HTML entities are still decoded for title, abstract and body.
This commit is contained in:
@@ -26,7 +26,7 @@
|
|||||||
|
|
||||||
@interface NSString (RSXML)
|
@interface NSString (RSXML)
|
||||||
|
|
||||||
- (NSString *)rs_stringByDecodingHTMLEntities;
|
- (NSString *)rsxml_stringByDecodingHTMLEntities;
|
||||||
- (nonnull NSString *)rsxml_md5HashString;
|
- (nonnull NSString *)rsxml_md5HashString;
|
||||||
- (nullable NSString *)absoluteURLWithBase:(nonnull NSURL *)baseURL;
|
- (nullable NSString *)absoluteURLWithBase:(nonnull NSURL *)baseURL;
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,7 @@
|
|||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
- (NSString *)rs_stringByDecodingHTMLEntities {
|
- (NSString *)rsxml_stringByDecodingHTMLEntities {
|
||||||
|
|
||||||
@autoreleasepool {
|
@autoreleasepool {
|
||||||
|
|
||||||
@@ -315,7 +315,7 @@ static NSDictionary *RSEntitiesDictionary(void) {
|
|||||||
@"#255": @"ÿ",
|
@"#255": @"ÿ",
|
||||||
@"#32": @" ",
|
@"#32": @" ",
|
||||||
@"#34": @"\"",
|
@"#34": @"\"",
|
||||||
@"#39": @"",
|
@"#39": @"'",
|
||||||
@"#8194": @" ",
|
@"#8194": @" ",
|
||||||
@"#8195": @" ",
|
@"#8195": @" ",
|
||||||
@"#8211": @"-",
|
@"#8211": @"-",
|
||||||
|
|||||||
@@ -223,12 +223,8 @@ static NSString *kRelatedValue = @"related";
|
|||||||
else if (EqualBytes(localName, "source", 6)) {
|
else if (EqualBytes(localName, "source", 6)) {
|
||||||
self.parsingSource = NO;
|
self.parsingSource = NO;
|
||||||
}
|
}
|
||||||
return;
|
else if (isArticle && EqualBytes(localName, "issued", 6)) { // Atom 0.3 date
|
||||||
case 8:
|
self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters];;
|
||||||
if (!self.parsingArticle && !self.parsingSource && self.parsedFeed.subtitle.length == 0) {
|
|
||||||
if (EqualBytes(localName, "subtitle", 8)) {
|
|
||||||
self.parsedFeed.subtitle = SAXParser.currentStringWithTrimmedWhitespace;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
case 7:
|
case 7:
|
||||||
@@ -244,6 +240,16 @@ static NSString *kRelatedValue = @"related";
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
case 8:
|
||||||
|
if (!self.parsingArticle && !self.parsingSource && self.parsedFeed.subtitle.length == 0) {
|
||||||
|
if (EqualBytes(localName, "subtitle", 8)) {
|
||||||
|
self.parsedFeed.subtitle = SAXParser.currentStringWithTrimmedWhitespace;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (isArticle && EqualBytes(localName, "modified", 8)) { // Atom 0.3 date
|
||||||
|
self.currentArticle.dateModified = [self dateFromCharacters:SAXParser.currentCharacters];;
|
||||||
|
}
|
||||||
|
return;
|
||||||
case 9:
|
case 9:
|
||||||
if (isArticle && EqualBytes(localName, "published", 9)) {
|
if (isArticle && EqualBytes(localName, "published", 9)) {
|
||||||
self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters];
|
self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters];
|
||||||
|
|||||||
@@ -52,7 +52,7 @@
|
|||||||
|
|
||||||
/// @return currentString by removing HTML encoded entities.
|
/// @return currentString by removing HTML encoded entities.
|
||||||
- (NSString *)decodeHTMLEntities:(NSString *)str {
|
- (NSString *)decodeHTMLEntities:(NSString *)str {
|
||||||
return [str rs_stringByDecodingHTMLEntities];
|
return [str rsxml_stringByDecodingHTMLEntities];
|
||||||
}
|
}
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|||||||
@@ -58,37 +58,32 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
- (void)testNotEntities {
|
- (void)testNotEntities {
|
||||||
|
|
||||||
NSString *s = @"&&\t\nFoo & Bar &0; Baz & 1238 4948 More things &foobar;&";
|
NSString *s = @"&&\t\nFoo & Bar &0; Baz & 1238 4948 More things &foobar;&";
|
||||||
NSString *result = [s rs_stringByDecodingHTMLEntities];
|
XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], s);
|
||||||
XCTAssertEqualObjects(result, s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void)testURLs {
|
- (void)testURLs {
|
||||||
|
NSString *s = @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all";
|
||||||
NSString *urlString = @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all";
|
|
||||||
NSString *expectedResult = @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all";
|
NSString *expectedResult = @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all";
|
||||||
|
XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], expectedResult);
|
||||||
NSString *result = [urlString rs_stringByDecodingHTMLEntities];
|
|
||||||
XCTAssertEqualObjects(result, expectedResult);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void)testEntityPlusWhitespace {
|
- (void)testEntityPlusWhitespace {
|
||||||
|
|
||||||
NSString *s = @"∞ Permalink";
|
NSString *s = @"∞ Permalink";
|
||||||
NSString *expectedResult = @"∞ Permalink";
|
NSString *expectedResult = @"∞ Permalink";
|
||||||
|
XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], expectedResult);
|
||||||
NSString *result = [s rs_stringByDecodingHTMLEntities];
|
|
||||||
XCTAssertEqualObjects(result, expectedResult);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void)testNonBreakingSpace {
|
- (void)testNonBreakingSpace {
|
||||||
|
|
||||||
NSString *s = @"   -- just some spaces";
|
NSString *s = @"   -- just some spaces";
|
||||||
NSString *expectedResult = [NSString stringWithFormat:@"%C%C -- just some spaces", 160, 160];
|
NSString *expectedResult = [NSString stringWithFormat:@"%C%C -- just some spaces", 160, 160];
|
||||||
|
XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], expectedResult);
|
||||||
|
}
|
||||||
|
|
||||||
NSString *result = [s rs_stringByDecodingHTMLEntities];
|
- (void)test39encoding {
|
||||||
XCTAssertEqualObjects(result, expectedResult);
|
NSString *s = @"These are the times that try men's souls.";
|
||||||
|
NSString *expectedResult = @"These are the times that try men's souls.";
|
||||||
|
XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], expectedResult);
|
||||||
}
|
}
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|||||||
Reference in New Issue
Block a user