#!/usr/bin/swift
// Expected input format:
//
// id-number<TAB>kanji<TAB>reading、reading、reading
//
// Readings may have brackets, which can be ignored, and hyphens,
// which separate the kanji pronunciation from kana suffixes
// (usually seen in verbs and adjectives, e.g.「くだ-さる」).
import Foundation
// MARK: Parse the input.
let brackets = CharacterSet(charactersIn: "[]")
func normalizeReading(_ reading: String) -> String {
return reading.trimmingCharacters(in: brackets)
}
var allReadings: [(kanji: String, readings: [String])] = []
while let line = readLine() {
let fields = line.components(separatedBy: "\t")
// Skip blank lines, comments, headers.
guard fields.count == 3 else { continue }
guard Int(fields[0]) != nil else { continue }
let readings = fields[2].components(separatedBy: "、")
let normalized = readings.map(normalizeReading)
allReadings.append((kanji: fields[1], readings: normalized))
}
// MARK: Get the longest reading for each kanji in the list.
func readingLength(of reading: String) -> Int {
if let dashRange = reading.range(of: "-", options: .literal) {
return reading.substring(to: dashRange.lowerBound).characters.count
}
return reading.characters.count
}
var longestReadingsMapping: [(kanji: String, reading: String)] =
allReadings.map {
let longest = $0.readings.max {
return readingLength(of: $0) < readingLength(of: $1)
}!
return (kanji: $0.kanji, reading: longest)
}
// MARK: Finally, get the kanji with the longest readings.
longestReadingsMapping.sort {
return readingLength(of: $0.reading) > readingLength(of: $1.reading)
}
for (kanji, reading) in longestReadingsMapping.prefix(10) {
print("\(kanji): \(reading)")
}
This entry was posted on
March
03,
2017.