{-# LANGUAGE StrictData #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
module Citeproc.CaseTransform
( CaseTransformState(..)
, CaseTransformer(..)
, withUppercaseAll
, withLowercaseAll
, withCapitalizeWords
, withCapitalizeFirst
, withSentenceCase
, withTitleCase
)
where
import Data.Ord ()
import Data.Char (isUpper, isLower)
import Data.Text (Text)
import qualified Data.Text as T
import Citeproc.Types (Lang(..))
import qualified Citeproc.Unicode as Unicode
newtype CaseTransformer =
CaseTransformer
{ CaseTransformer -> Maybe Lang -> CaseTransformState -> Text -> Text
unCaseTransformer :: Maybe Lang -> CaseTransformState -> Text -> Text }
data CaseTransformState =
Start
| StartSentence
| AfterWordEnd
| AfterWordChar
| AfterSentenceEndingPunctuation
| BeforeLastWord
deriving (Int -> CaseTransformState -> ShowS
[CaseTransformState] -> ShowS
CaseTransformState -> String
(Int -> CaseTransformState -> ShowS)
-> (CaseTransformState -> String)
-> ([CaseTransformState] -> ShowS)
-> Show CaseTransformState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CaseTransformState] -> ShowS
$cshowList :: [CaseTransformState] -> ShowS
show :: CaseTransformState -> String
$cshow :: CaseTransformState -> String
showsPrec :: Int -> CaseTransformState -> ShowS
$cshowsPrec :: Int -> CaseTransformState -> ShowS
Show, CaseTransformState -> CaseTransformState -> Bool
(CaseTransformState -> CaseTransformState -> Bool)
-> (CaseTransformState -> CaseTransformState -> Bool)
-> Eq CaseTransformState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CaseTransformState -> CaseTransformState -> Bool
$c/= :: CaseTransformState -> CaseTransformState -> Bool
== :: CaseTransformState -> CaseTransformState -> Bool
$c== :: CaseTransformState -> CaseTransformState -> Bool
Eq)
withUppercaseAll :: CaseTransformer
withUppercaseAll :: CaseTransformer
withUppercaseAll =
(Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer (\mblang :: Maybe Lang
mblang _ -> Maybe Lang -> Text -> Text
Unicode.toUpper Maybe Lang
mblang)
withLowercaseAll :: CaseTransformer
withLowercaseAll :: CaseTransformer
withLowercaseAll =
(Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer (\mblang :: Maybe Lang
mblang _ -> Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang)
withCapitalizeWords :: CaseTransformer
withCapitalizeWords :: CaseTransformer
withCapitalizeWords = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
where
go :: Maybe Lang -> CaseTransformState -> Text -> Text
go mblang :: Maybe Lang
mblang st :: CaseTransformState
st chunk :: Text
chunk
| Text -> Bool
isMixedCase Text
chunk = Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
AfterWordEnd Bool -> Bool -> Bool
||
CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
BeforeLastWord
= if (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
chunk
then Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang Text
chunk
else Text
chunk
| Bool
otherwise = Text
chunk
withCapitalizeFirst :: CaseTransformer
withCapitalizeFirst :: CaseTransformer
withCapitalizeFirst = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
where
go :: Maybe Lang -> CaseTransformState -> Text -> Text
go mblang :: Maybe Lang
mblang st :: CaseTransformState
st chunk :: Text
chunk
| Text -> Bool
isMixedCase Text
chunk = Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start
= if (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
chunk
then Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang Text
chunk
else Text
chunk
| Bool
otherwise = Text
chunk
withSentenceCase :: CaseTransformer
withSentenceCase :: CaseTransformer
withSentenceCase = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
where
go :: Maybe Lang -> CaseTransformState -> Text -> Text
go mblang :: Maybe Lang
mblang st :: CaseTransformState
st chunk :: Text
chunk
| Text -> Bool
isCapitalized Text
chunk
, Bool -> Bool
not (CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence)
= Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| Text -> Bool
isCapitalized Text
chunk Bool -> Bool -> Bool
|| (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
chunk
, CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence
= Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| Bool
otherwise = Text
chunk
withTitleCase :: CaseTransformer
withTitleCase :: CaseTransformer
withTitleCase = (Maybe Lang -> CaseTransformState -> Text -> Text)
-> CaseTransformer
CaseTransformer Maybe Lang -> CaseTransformState -> Text -> Text
go
where
go :: Maybe Lang -> CaseTransformState -> Text -> Text
go mblang :: Maybe Lang
mblang st :: CaseTransformState
st chunk :: Text
chunk
| Text -> Bool
isMixedCase Text
chunk = Text
chunk
| (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isUpper Text
chunk = Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
StartSentence Bool -> Bool -> Bool
|| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
Start =
Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
AfterWordEnd
, Bool -> Bool
not (Text -> Bool
isStopWord Text
chunk)
, Text -> Int -> Ordering
T.compareLength Text
chunk 1 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
GT =
Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| CaseTransformState
st CaseTransformState -> CaseTransformState -> Bool
forall a. Eq a => a -> a -> Bool
== CaseTransformState
BeforeLastWord
, Text -> Int -> Ordering
T.compareLength Text
chunk 1 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
GT =
Maybe Lang -> Text -> Text
capitalizeText Maybe Lang
mblang (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Maybe Lang -> Text -> Text
Unicode.toLower Maybe Lang
mblang Text
chunk
| Bool
otherwise = Text
chunk
isCapitalized :: Text -> Bool
isCapitalized :: Text -> Bool
isCapitalized t :: Text
t =
case Text -> Maybe (Char, Text)
T.uncons Text
t of
Just (c :: Char
c, t' :: Text
t') -> Char -> Bool
isUpper Char
c Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isLower Text
t'
_ -> Bool
False
isMixedCase :: Text -> Bool
isMixedCase :: Text -> Bool
isMixedCase t :: Text
t = (Char -> Bool) -> Text -> Bool
T.any Char -> Bool
isUpper Text
t Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.any Char -> Bool
isLower Text
t
capitalizeText :: Maybe Lang -> Text -> Text
capitalizeText :: Maybe Lang -> Text -> Text
capitalizeText mblang :: Maybe Lang
mblang x :: Text
x =
case Text -> Maybe (Char, Text)
T.uncons Text
x of
Just (c :: Char
c,x' :: Text
x') -> Maybe Lang -> Text -> Text
Unicode.toUpper Maybe Lang
mblang (Char -> Text
T.singleton Char
c) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
x'
Nothing -> Text
x
isStopWord :: Text -> Bool
isStopWord :: Text -> Bool
isStopWord "a" = Bool
True
isStopWord "an" = Bool
True
isStopWord "and" = Bool
True
isStopWord "as" = Bool
True
isStopWord "at" = Bool
True
isStopWord "but" = Bool
True
isStopWord "by" = Bool
True
isStopWord "down" = Bool
True
isStopWord "for" = Bool
True
isStopWord "from" = Bool
True
isStopWord "in" = Bool
True
isStopWord "into" = Bool
True
isStopWord "nor" = Bool
True
isStopWord "of" = Bool
True
isStopWord "on" = Bool
True
isStopWord "onto" = Bool
True
isStopWord "or" = Bool
True
isStopWord "over" = Bool
True
isStopWord "so" = Bool
True
isStopWord "the" = Bool
True
isStopWord "till" = Bool
True
isStopWord "to" = Bool
True
isStopWord "up" = Bool
True
isStopWord "via" = Bool
True
isStopWord "with" = Bool
True
isStopWord "yet" = Bool
True
isStopWord "about" = Bool
True
isStopWord "van" = Bool
True
isStopWord "von" = Bool
True
isStopWord "de" = Bool
True
isStopWord "d" = Bool
True
isStopWord "l" = Bool
True
isStopWord _ = Bool
False