longest.swift // Chigaijin

longest.swift

#!/usr/bin/swift

// Expected input format:
//
// id-number<TAB>kanji<TAB>reading、reading、reading
//
// Readings may have brackets, which can be ignored, and hyphens,
// which separate the kanji pronunciation from kana suffixes
// (usually seen in verbs and adjectives, e.g.「くだ-さる」).

import Foundation

// MARK: Parse the input.

let brackets = CharacterSet(charactersIn: "[]")
func normalizeReading(_ reading: String) -> String {
  return reading.trimmingCharacters(in: brackets)
}

var allReadings: [(kanji: String, readings: [String])] = []

while let line = readLine() {
  let fields = line.components(separatedBy: "\t")
  
  // Skip blank lines, comments, headers.
  guard fields.count == 3 else { continue }
  guard Int(fields[0]) != nil else { continue }
  
  let readings = fields[2].components(separatedBy: "、")
  let normalized = readings.map(normalizeReading)
  allReadings.append((kanji: fields[1], readings: normalized))
}

// MARK: Get the longest reading for each kanji in the list.

func readingLength(of reading: String) -> Int {
  if let dashRange = reading.range(of: "-", options: .literal) {
    return reading.substring(to: dashRange.lowerBound).characters.count
  }
  return reading.characters.count
}

var longestReadingsMapping: [(kanji: String, reading: String)] =
    allReadings.map {
  let longest = $0.readings.max {
    return readingLength(of: $0) < readingLength(of: $1)
  }!
  return (kanji: $0.kanji, reading: longest)
}

// MARK: Finally, get the kanji with the longest readings.

longestReadingsMapping.sort {
  return readingLength(of: $0.reading) > readingLength(of: $1.reading)
}

for (kanji, reading) in longestReadingsMapping.prefix(10) {
  print("\(kanji): \(reading)")
}
This entry was posted on March 03, 2017.
Life and Thoughts

longest.swift