Files
baRSS/baRSS/NSCategories/NSString+Ext.m
2019-09-26 16:24:51 +02:00

163 lines
5.6 KiB
Objective-C

//
// The MIT License (MIT)
// Copyright (c) 2019 Oleg Geier
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to do
// so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#import "NSString+Ext.h"
@implementation NSString (PlainHTML)
/// Init string with @c NSUTF8StringEncoding and call @c htmlToPlainText
+ (NSString*)plainTextFromHTMLData:(NSData*)data {
if (!data) return nil;
return [[[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding] htmlToPlainText];
}
/**
Simple HTML parser to extract TEXT elements and semi-structured elements like list items.
Ignores @c <head> , @c <style> and @c <script> tags.
*/
- (nonnull NSString*)htmlToPlainText {
NSScanner *scanner = [NSScanner scannerWithString:self];
scanner.charactersToBeSkipped = NSCharacterSet.newlineCharacterSet; // ! else, some spaces are dropped
NSCharacterSet *angleBrackets = [NSCharacterSet characterSetWithCharactersInString:@"<>"];
unichar prev = '>';
int order = 0; // ul & ol
NSString *skip = nil; // head, style, script
NSMutableString *result = [NSMutableString stringWithString:@" "];
while ([scanner isAtEnd] == NO) {
NSString *tag = nil;
if ([scanner scanUpToCharactersFromSet:angleBrackets intoString:&tag]) {
// parse html tag depending on type
if (prev == '<') {
if (skip) {
// skip everything between <head>, <style>, and <script> tags
if (CLOSE(tag, skip))
skip = nil;
continue;
}
if (OPEN(tag, @"a")) [result appendString:@" "];
else if (OPEN(tag, @"head")) skip = @"/head";
else if (OPEN(tag, @"style")) skip = @"/style";
else if (OPEN(tag, @"script")) skip = @"/script";
else if (CLOSE(tag, @"/p") || OPEN(tag, @"label") || OPEN(tag, @"br"))
[result appendString:@"\n"];
else if (OPEN(tag, @"h1") || OPEN(tag, @"h2") || OPEN(tag, @"h3") ||
OPEN(tag, @"h4") || OPEN(tag, @"h5") || OPEN(tag, @"h6") ||
CLOSE(tag, @"/h1") || CLOSE(tag, @"/h2") || CLOSE(tag, @"/h3") ||
CLOSE(tag, @"/h4") || CLOSE(tag, @"/h5") || CLOSE(tag, @"/h6"))
[result appendString:@"\n"];
else if (OPEN(tag, @"ol")) order = 1;
else if (OPEN(tag, @"ul")) order = 0;
else if (OPEN(tag, @"li")) {
// ordered and unordered list items
unichar last = [result characterAtIndex:result.length - 1];
if (last != '\n') {
[result appendString:@"\n"];
}
if (order > 0) [result appendFormat:@" %d. ", order++];
else [result appendString:@""];
}
} else {
// append text inbetween tags
if (!skip) {
[result appendString:tag];
}
}
}
if (![scanner isAtEnd]) {
unichar next = [self characterAtIndex:scanner.scanLocation];
if (prev == next) {
if (!skip)
[result appendFormat:@"%c", prev];
}
prev = next;
++scanner.scanLocation;
}
}
// collapsing multiple horizontal whitespaces (\h) into one (the first one)
[[NSRegularExpression regularExpressionWithPattern:@"(\\h)[\\h]+" options:0 error:nil]
replaceMatchesInString:result options:0 range:NSMakeRange(0, result.length) withTemplate:@"$1"];
return [result stringByTrimmingCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet];
}
#pragma mark - Helper methods
static inline BOOL OPEN(NSString *tag, NSString *match) {
return ([tag isEqualToString:match] || [tag hasPrefix:[match stringByAppendingString:@" "]]);
}
static inline BOOL CLOSE(NSString *tag, NSString *match) {
return [tag isEqualToString:match];
}
@end
@implementation NSString (HexColor)
/**
Color from hex string with format: @c #[0x|0X]([A]RGB|[AA]RRGGBB)
@return @c nil if string is not properly formatted.
*/
- (nullable NSColor*)hexColor {
if ([self characterAtIndex:0] != '#') // must start with '#'
return nil;
NSScanner *scanner = [NSScanner scannerWithString:self];
scanner.scanLocation = 1;
unsigned int value;
if (![scanner scanHexInt:&value])
return nil;
NSUInteger len = scanner.scanLocation - 1; // -'#'
if (len > 1 && ([self characterAtIndex:2] == 'x' || [self characterAtIndex:3] == 'X'))
len -= 2; // ignore '0x'RRGGBB
unsigned int r = 0, g = 0, b = 0, a = 255;
switch (len) {
case 4: // #ARGB
// ignore alpha for now
// a = (value >> 8) & 0xF0; a = a | (a >> 4);
case 3: // #RGB
r = (value >> 4) & 0xF0; r = r | (r >> 4);
g = (value) & 0xF0; g = g | (g >> 4);
b = (value) & 0x0F; b = b | (b << 4);
break;
case 8: // #AARRGGBB
// a = (value >> 24) & 0xFF;
case 6: // #RRGGBB
r = (value >> 16) & 0xFF;
g = (value >> 8) & 0xFF;
b = (value) & 0xFF;
break;
default:
return nil;
}
return [NSColor colorWithCalibratedRed:r/255.f green:g/255.f blue:b/255.f alpha:a/255.f];
}
@end