RFC 4180対応版 CSVレコードの分解

エロと風俗情報満載 どう抜く?より。Persecの練習のため、このプログラムを写経しました。写経というにはずい分変わってしまったように思いますが。

module Main (main) where

import Text.ParserCombinators.Parsec 
  (Parser, parse, char, string, many, try, (<|>), sepBy, noneOf, between)

splitCVS :: String -> [String]
splitCVS = either (error . show) id . parse record ""
  where
    record :: Parser [String]
    record = sepBy (quotedField <|> field) $ char ','
    
    field :: Parser String
    field = many $ noneOf ","
    
    quotedField :: Parser String
    quotedField = 
      between (char '"') (char '"') $ 
        many $ noneOf "\"" <|> try (string "\"\"" >> return '"')

main :: IO ()
main = mapM_ putStrLn $ zipWith (\ a b -> show a ++ " => " ++ b) [1 ..] $ 
  splitCVS "\"aaa\",\"b\nbb\",\"ccc\",zzz,\"y\"\"Y\"\"y\",xxx" 
-- => 1 => aaa
--    2 => b
--    bb
--    3 => ccc
--    4 => zzz
--    5 => y"Y"y
--    6 => xxx