module LightGBM.Utils.Csv (
readColumn
, dropColumns
, keepColumns
, dropNamedColumns
, keepNamedColumns) where
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Lazy.Char8 as BSL8
import qualified Data.Csv as CSV
import qualified Data.Foldable as F
import Data.Maybe (isNothing, catMaybes)
import qualified Data.Vector as V
readColumn :: Read a => Int -> CSV.HasHeader -> BSL.ByteString -> V.Vector a
readColumn index headerStatus csvData =
let recs =
CSV.decode headerStatus csvData :: Either String (V.Vector (V.Vector BSL.ByteString)) in
case recs of
Left err -> error err
Right rows -> V.map (extractColumn index) rows
where
extractColumn :: Read c => Int -> V.Vector BSL.ByteString -> c
extractColumn n = read . BSL8.unpack . (V.! n)
type RawCSV = V.Vector (V.Vector BSL.ByteString)
filterColumns ::
Foldable t
=> (Int -> t Int -> Bool)
-> t Int
-> BSL.ByteString
-> BSL.ByteString
filterColumns colPred indices csvdata =
let rawCols = CSV.decode CSV.NoHeader csvdata :: Either String RawCSV
in case rawCols of
Left err -> error err
Right rcs ->
let newCols = V.map (V.ifilter (\i _ -> i `colPred` indices)) rcs
in CSV.encode $ V.toList newCols
filterNamedColumns ::
(Foldable t, Functor t)
=> (Int -> [Int] -> Bool)
-> t BSL.ByteString
-> BSL.ByteString
-> BSL.ByteString
filterNamedColumns colPred names csvdata =
let headerLine = head $ BSL8.lines csvdata
colHeaders = CSV.decode CSV.NoHeader headerLine :: Either String RawCSV
in case colHeaders of
Left err -> error err
Right headerRows ->
let headers = headerRows V.! 0
filterIndices = fmap (`V.elemIndex` headers) names
in case any isNothing filterIndices of
True -> error "Bad header name!!"
False ->
filterColumns
colPred
(catMaybes . F.toList $ filterIndices)
csvdata
dropNamedColumns ::
(Foldable t, Functor t)
=> t BSL8.ByteString
-> BSL8.ByteString
-> BSL8.ByteString
dropNamedColumns = filterNamedColumns notElem
keepNamedColumns ::
(Foldable t, Functor t)
=> t BSL8.ByteString
-> BSL8.ByteString
-> BSL8.ByteString
keepNamedColumns = filterNamedColumns elem
dropColumns :: Foldable t => t Int -> BSL.ByteString -> BSL.ByteString
dropColumns = filterColumns notElem
keepColumns :: Foldable t => t Int -> BSL.ByteString -> BSL.ByteString
keepColumns = filterColumns elem