Added all bugfixes from current state of RSParser. Except HTML entities are still decoded for title, abstract and body.

This commit is contained in:
relikd
2019-01-12 23:28:48 +01:00
parent 2732573003
commit ca9452bc18
5 changed files with 27 additions and 26 deletions

View File

@@ -26,7 +26,7 @@
@interface NSString (RSXML) @interface NSString (RSXML)
- (NSString *)rs_stringByDecodingHTMLEntities; - (NSString *)rsxml_stringByDecodingHTMLEntities;
- (nonnull NSString *)rsxml_md5HashString; - (nonnull NSString *)rsxml_md5HashString;
- (nullable NSString *)absoluteURLWithBase:(nonnull NSURL *)baseURL; - (nullable NSString *)absoluteURLWithBase:(nonnull NSURL *)baseURL;

View File

@@ -61,7 +61,7 @@
return self; return self;
} }
- (NSString *)rs_stringByDecodingHTMLEntities { - (NSString *)rsxml_stringByDecodingHTMLEntities {
@autoreleasepool { @autoreleasepool {
@@ -315,7 +315,7 @@ static NSDictionary *RSEntitiesDictionary(void) {
@"#255": @"ÿ", @"#255": @"ÿ",
@"#32": @" ", @"#32": @" ",
@"#34": @"\"", @"#34": @"\"",
@"#39": @"", @"#39": @"'",
@"#8194": @" ", @"#8194": @" ",
@"#8195": @" ", @"#8195": @" ",
@"#8211": @"-", @"#8211": @"-",

View File

@@ -223,12 +223,8 @@ static NSString *kRelatedValue = @"related";
else if (EqualBytes(localName, "source", 6)) { else if (EqualBytes(localName, "source", 6)) {
self.parsingSource = NO; self.parsingSource = NO;
} }
return; else if (isArticle && EqualBytes(localName, "issued", 6)) { // Atom 0.3 date
case 8: self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters];;
if (!self.parsingArticle && !self.parsingSource && self.parsedFeed.subtitle.length == 0) {
if (EqualBytes(localName, "subtitle", 8)) {
self.parsedFeed.subtitle = SAXParser.currentStringWithTrimmedWhitespace;
}
} }
return; return;
case 7: case 7:
@@ -244,6 +240,16 @@ static NSString *kRelatedValue = @"related";
} }
} }
return; return;
case 8:
if (!self.parsingArticle && !self.parsingSource && self.parsedFeed.subtitle.length == 0) {
if (EqualBytes(localName, "subtitle", 8)) {
self.parsedFeed.subtitle = SAXParser.currentStringWithTrimmedWhitespace;
}
}
else if (isArticle && EqualBytes(localName, "modified", 8)) { // Atom 0.3 date
self.currentArticle.dateModified = [self dateFromCharacters:SAXParser.currentCharacters];;
}
return;
case 9: case 9:
if (isArticle && EqualBytes(localName, "published", 9)) { if (isArticle && EqualBytes(localName, "published", 9)) {
self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters]; self.currentArticle.datePublished = [self dateFromCharacters:SAXParser.currentCharacters];

View File

@@ -52,7 +52,7 @@
/// @return currentString by removing HTML encoded entities. /// @return currentString by removing HTML encoded entities.
- (NSString *)decodeHTMLEntities:(NSString *)str { - (NSString *)decodeHTMLEntities:(NSString *)str {
return [str rs_stringByDecodingHTMLEntities]; return [str rsxml_stringByDecodingHTMLEntities];
} }
@end @end

View File

@@ -58,37 +58,32 @@
} }
- (void)testNotEntities { - (void)testNotEntities {
NSString *s = @"&&\t\nFoo & Bar &0; Baz & 1238 4948 More things &foobar;&"; NSString *s = @"&&\t\nFoo & Bar &0; Baz & 1238 4948 More things &foobar;&";
NSString *result = [s rs_stringByDecodingHTMLEntities]; XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], s);
XCTAssertEqualObjects(result, s);
} }
- (void)testURLs { - (void)testURLs {
NSString *s = @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all";
NSString *urlString = @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all";
NSString *expectedResult = @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all"; NSString *expectedResult = @"http://www.nytimes.com/2015/09/05/us/at-west-point-annual-pillow-fight-becomes-weaponized.html?mwrsm=Email&_r=1&pagewanted=all";
XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], expectedResult);
NSString *result = [urlString rs_stringByDecodingHTMLEntities];
XCTAssertEqualObjects(result, expectedResult);
} }
- (void)testEntityPlusWhitespace { - (void)testEntityPlusWhitespace {
NSString *s = @"∞ Permalink"; NSString *s = @"∞ Permalink";
NSString *expectedResult = @"∞ Permalink"; NSString *expectedResult = @"∞ Permalink";
XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], expectedResult);
NSString *result = [s rs_stringByDecodingHTMLEntities];
XCTAssertEqualObjects(result, expectedResult);
} }
- (void)testNonBreakingSpace { - (void)testNonBreakingSpace {
NSString *s = @"   -- just some spaces"; NSString *s = @"   -- just some spaces";
NSString *expectedResult = [NSString stringWithFormat:@"%C%C -- just some spaces", 160, 160]; NSString *expectedResult = [NSString stringWithFormat:@"%C%C -- just some spaces", 160, 160];
XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], expectedResult);
NSString *result = [s rs_stringByDecodingHTMLEntities]; }
XCTAssertEqualObjects(result, expectedResult);
- (void)test39encoding {
NSString *s = @"These are the times that try men's souls.";
NSString *expectedResult = @"These are the times that try men's souls.";
XCTAssertEqualObjects([s rs_stringByDecodingHTMLEntities], expectedResult);
} }
@end @end