{-# OPTIONS #-}
module Language.Python.Common.LexerUtils where
import Control.Monad (liftM)
import Data.List (foldl')
import Data.Word (Word8)
import Language.Python.Common.Token as Token
import Language.Python.Common.ParserMonad hiding (location)
import Language.Python.Common.SrcLocation
import Codec.Binary.UTF8.String as UTF8 (encode)
type Byte = Word8
data BO = BOF | BOL
type StartCode = Int
type Action = SrcSpan -> Int -> String -> P Token
lineJoin :: Action
lineJoin :: Action
lineJoin SrcSpan
span Int
_len String
_str =
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> Token
LineJoinToken (SrcSpan -> Token) -> SrcSpan -> Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> SrcSpan
spanStartPoint SrcSpan
span
endOfLine :: P Token -> Action
endOfLine :: StateT ParseState (Either ParseError) Token -> Action
endOfLine StateT ParseState (Either ParseError) Token
lexToken SrcSpan
span Int
_len String
_str = do
SrcSpan -> P ()
setLastEOL (SrcSpan -> P ()) -> SrcSpan -> P ()
forall a b. (a -> b) -> a -> b
$ SrcSpan -> SrcSpan
spanStartPoint SrcSpan
span
StateT ParseState (Either ParseError) Token
lexToken
bolEndOfLine :: P Token -> Int -> Action
bolEndOfLine :: StateT ParseState (Either ParseError) Token -> Int -> Action
bolEndOfLine StateT ParseState (Either ParseError) Token
lexToken Int
bol SrcSpan
span Int
len String
inp = do
Int -> P ()
pushStartCode Int
bol
StateT ParseState (Either ParseError) Token -> Action
endOfLine StateT ParseState (Either ParseError) Token
lexToken SrcSpan
span Int
len String
inp
dedentation :: P Token -> Action
dedentation :: StateT ParseState (Either ParseError) Token -> Action
dedentation StateT ParseState (Either ParseError) Token
lexToken SrcSpan
span Int
_len String
_str = do
Int
topIndent <- P Int
getIndent
case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (SrcSpan -> Int
startCol SrcSpan
span) Int
topIndent of
Ordering
EQ -> do P ()
popStartCode
StateT ParseState (Either ParseError) Token
lexToken
Ordering
LT -> do P ()
popIndent
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
dedentToken
Ordering
GT -> SrcSpan -> String -> StateT ParseState (Either ParseError) Token
forall a b. Span a => a -> String -> P b
spanError SrcSpan
span String
"indentation error"
indentation :: P Token -> Int -> BO -> Action
indentation :: StateT ParseState (Either ParseError) Token -> Int -> BO -> Action
indentation StateT ParseState (Either ParseError) Token
lexToken Int
_dedentCode BO
bo SrcSpan
_loc Int
_len [] = do
P ()
popStartCode
case BO
bo of
BO
BOF -> StateT ParseState (Either ParseError) Token
lexToken
BO
BOL -> StateT ParseState (Either ParseError) Token
newlineToken
indentation StateT ParseState (Either ParseError) Token
lexToken Int
dedentCode BO
bo SrcSpan
span Int
_len String
_str = do
P ()
popStartCode
Int
parenDepth <- P Int
getParenStackDepth
if Int
parenDepth Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
then StateT ParseState (Either ParseError) Token
lexToken
else do
Int
topIndent <- P Int
getIndent
case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (SrcSpan -> Int
startCol SrcSpan
span) Int
topIndent of
Ordering
EQ -> case BO
bo of
BO
BOF -> StateT ParseState (Either ParseError) Token
lexToken
BO
BOL -> StateT ParseState (Either ParseError) Token
newlineToken
Ordering
LT -> do Int -> P ()
pushStartCode Int
dedentCode
StateT ParseState (Either ParseError) Token
newlineToken
Ordering
GT -> do Int -> P ()
pushIndent (SrcSpan -> Int
startCol SrcSpan
span)
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
indentToken
where
indentToken :: Token
indentToken = SrcSpan -> Token
IndentToken SrcSpan
span
symbolToken :: (SrcSpan -> Token) -> Action
symbolToken :: (SrcSpan -> Token) -> Action
symbolToken SrcSpan -> Token
mkToken SrcSpan
location Int
_ String
_ = Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (SrcSpan -> Token
mkToken SrcSpan
location)
token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action
token :: forall a.
(SrcSpan -> String -> a -> Token) -> (String -> a) -> Action
token SrcSpan -> String -> a -> Token
mkToken String -> a
read SrcSpan
location Int
len String
str
= Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> String -> a -> Token
mkToken SrcSpan
location String
literal (String -> a
read String
literal)
where
literal :: String
literal = Int -> String -> String
forall a. Int -> [a] -> [a]
take Int
len String
str
endOfFileToken :: Token
endOfFileToken :: Token
endOfFileToken = SrcSpan -> Token
EOFToken SrcSpan
SpanEmpty
dedentToken :: Token
dedentToken = SrcSpan -> Token
DedentToken SrcSpan
SpanEmpty
newlineToken :: P Token
newlineToken :: StateT ParseState (Either ParseError) Token
newlineToken = do
SrcSpan
loc <- P SrcSpan
getLastEOL
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> Token
NewlineToken SrcSpan
loc
atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF :: forall a. a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF a
_user AlexInput
_inputBeforeToken Int
_tokenLength (SrcLocation
_loc, [Byte]
_bs, String
inputAfterToken)
= String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
inputAfterToken Bool -> Bool -> Bool
|| Char
nextChar Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' Bool -> Bool -> Bool
|| Char
nextChar Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\r'
where
nextChar :: Char
nextChar = String -> Char
forall a. [a] -> a
head String
inputAfterToken
notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
notEOF :: forall a. a -> AlexInput -> Int -> AlexInput -> Bool
notEOF a
_user AlexInput
_inputBeforeToken Int
_tokenLength (SrcLocation
_loc, [Byte]
_bs, String
inputAfterToken)
= Bool -> Bool
not (String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
inputAfterToken)
delUnderscores :: String -> String
delUnderscores :: String -> String
delUnderscores [] = []
delUnderscores (Char
'_':String
xs) = String -> String
delUnderscores String
xs
delUnderscores (Char
x :String
xs) = Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
delUnderscores String
xs
readBinary :: String -> Integer
readBinary :: String -> Integer
readBinary
= String -> Integer
toBinary (String -> Integer) -> (String -> String) -> String -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> String
forall a. Int -> [a] -> [a]
drop Int
2
where
toBinary :: String -> Integer
toBinary = (Integer -> Char -> Integer) -> Integer -> String -> Integer
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Integer -> Char -> Integer
forall {a}. Num a => a -> Char -> a
acc Integer
0
acc :: a -> Char -> a
acc a
b Char
'0' = a
2 a -> a -> a
forall a. Num a => a -> a -> a
* a
b
acc a
b Char
'1' = a
2 a -> a -> a
forall a. Num a => a -> a -> a
* a
b a -> a -> a
forall a. Num a => a -> a -> a
+ a
1
acc a
_ Char
_ = String -> a
forall a. HasCallStack => String -> a
error String
"Lexer ensures all digits passed to readBinary are 0 or 1."
readFloat :: String -> Double
readFloat :: String -> Double
readFloat str :: String
str@(Char
'.':String
cs) = String -> Double
forall a. Read a => String -> a
read (Char
'0'Char -> String -> String
forall a. a -> [a] -> [a]
:String -> String
readFloatRest String
str)
readFloat String
str = String -> Double
forall a. Read a => String -> a
read (String -> String
readFloatRest String
str)
readFloatRest :: String -> String
readFloatRest :: String -> String
readFloatRest [] = []
readFloatRest [Char
'.'] = String
".0"
readFloatRest (Char
c:String
cs) = Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
readFloatRest String
cs
mkString :: (SrcSpan -> String -> Token) -> Action
mkString :: (SrcSpan -> String -> Token) -> Action
mkString SrcSpan -> String -> Token
toToken SrcSpan
loc Int
len String
str = do
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> String -> Token
toToken SrcSpan
loc (Int -> String -> String
forall a. Int -> [a] -> [a]
take Int
len String
str)
stringToken :: SrcSpan -> String -> Token
stringToken :: SrcSpan -> String -> Token
stringToken = SrcSpan -> String -> Token
StringToken
rawStringToken :: SrcSpan -> String -> Token
rawStringToken :: SrcSpan -> String -> Token
rawStringToken = SrcSpan -> String -> Token
StringToken
byteStringToken :: SrcSpan -> String -> Token
byteStringToken :: SrcSpan -> String -> Token
byteStringToken = SrcSpan -> String -> Token
ByteStringToken
formatStringToken :: SrcSpan -> String -> Token
formatStringToken :: SrcSpan -> String -> Token
formatStringToken = SrcSpan -> String -> Token
StringToken
formatRawStringToken :: SrcSpan -> String -> Token
formatRawStringToken :: SrcSpan -> String -> Token
formatRawStringToken = SrcSpan -> String -> Token
StringToken
unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken = SrcSpan -> String -> Token
UnicodeStringToken
rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken = SrcSpan -> String -> Token
ByteStringToken
openParen :: (SrcSpan -> Token) -> Action
openParen :: (SrcSpan -> Token) -> Action
openParen SrcSpan -> Token
mkToken SrcSpan
loc Int
_len String
_str = do
let token :: Token
token = SrcSpan -> Token
mkToken SrcSpan
loc
Token -> P ()
pushParen Token
token
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
token
closeParen :: (SrcSpan -> Token) -> Action
closeParen :: (SrcSpan -> Token) -> Action
closeParen SrcSpan -> Token
mkToken SrcSpan
loc Int
_len String
_str = do
let token :: Token
token = SrcSpan -> Token
mkToken SrcSpan
loc
Maybe Token
topParen <- P (Maybe Token)
getParen
case Maybe Token
topParen of
Maybe Token
Nothing -> SrcSpan -> String -> StateT ParseState (Either ParseError) Token
forall a b. Span a => a -> String -> P b
spanError SrcSpan
loc String
err1
Just Token
open -> if Token -> Token -> Bool
matchParen Token
open Token
token
then P ()
popParen P ()
-> StateT ParseState (Either ParseError) Token
-> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
token
else SrcSpan -> String -> StateT ParseState (Either ParseError) Token
forall a b. Span a => a -> String -> P b
spanError SrcSpan
loc String
err2
where
err1 :: String
err1 = String
"Lexical error ! unmatched closing paren"
err2 :: String
err2 = String
"Lexical error ! unmatched closing paren"
matchParen :: Token -> Token -> Bool
matchParen :: Token -> Token -> Bool
matchParen (LeftRoundBracketToken {}) (RightRoundBracketToken {}) = Bool
True
matchParen (LeftBraceToken {}) (RightBraceToken {}) = Bool
True
matchParen (LeftSquareBracketToken {}) (RightSquareBracketToken {}) = Bool
True
matchParen Token
_ Token
_ = Bool
False
type AlexInput = (SrcLocation,
[Byte],
String)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar AlexInput
_ = String -> Char
forall a. HasCallStack => String -> a
error String
"alexInputPrevChar not used"
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar (SrcLocation
loc, [], String
input)
| String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
input = Maybe (Char, AlexInput)
forall a. Maybe a
Nothing
| Bool
otherwise = SrcLocation -> Maybe (Char, AlexInput) -> Maybe (Char, AlexInput)
seq SrcLocation
nextLoc ((Char, AlexInput) -> Maybe (Char, AlexInput)
forall a. a -> Maybe a
Just (Char
nextChar, (SrcLocation
nextLoc, [], String
rest)))
where
nextChar :: Char
nextChar = String -> Char
forall a. [a] -> a
head String
input
rest :: String
rest = String -> String
forall a. [a] -> [a]
tail String
input
nextLoc :: SrcLocation
nextLoc = Char -> SrcLocation -> SrcLocation
moveChar Char
nextChar SrcLocation
loc
alexGetChar (SrcLocation
loc, Byte
_:[Byte]
_, String
_) = String -> Maybe (Char, AlexInput)
forall a. HasCallStack => String -> a
error String
"alexGetChar called with non-empty byte buffer"
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte (SrcLocation
loc, Byte
b:[Byte]
bs, String
input) = (Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a. a -> Maybe a
Just (Byte
b, (SrcLocation
loc, [Byte]
bs, String
input))
alexGetByte (SrcLocation
loc, [], []) = Maybe (Byte, AlexInput)
forall a. Maybe a
Nothing
alexGetByte (SrcLocation
loc, [], Char
nextChar:String
rest) =
SrcLocation -> Maybe (Byte, AlexInput) -> Maybe (Byte, AlexInput)
seq SrcLocation
nextLoc ((Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a. a -> Maybe a
Just (Byte
byte, (SrcLocation
nextLoc, [Byte]
restBytes, String
rest)))
where
nextLoc :: SrcLocation
nextLoc = Char -> SrcLocation -> SrcLocation
moveChar Char
nextChar SrcLocation
loc
Byte
byte:[Byte]
restBytes = String -> [Byte]
UTF8.encode [Char
nextChar]
moveChar :: Char -> SrcLocation -> SrcLocation
moveChar :: Char -> SrcLocation -> SrcLocation
moveChar Char
'\n' = Int -> SrcLocation -> SrcLocation
incLine Int
1
moveChar Char
'\t' = SrcLocation -> SrcLocation
incTab
moveChar Char
'\r' = SrcLocation -> SrcLocation
forall a. a -> a
id
moveChar Char
_ = Int -> SrcLocation -> SrcLocation
incColumn Int
1
lexicalError :: P a
lexicalError :: forall a. P a
lexicalError = do
SrcLocation
location <- P SrcLocation
getLocation
Char
c <- (String -> Char)
-> StateT ParseState (Either ParseError) String
-> StateT ParseState (Either ParseError) Char
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> Char
forall a. [a] -> a
head StateT ParseState (Either ParseError) String
getInput
ParseError -> P a
forall a. ParseError -> P a
throwError (ParseError -> P a) -> ParseError -> P a
forall a b. (a -> b) -> a -> b
$ Char -> SrcLocation -> ParseError
UnexpectedChar Char
c SrcLocation
location
readOctNoO :: String -> Integer
readOctNoO :: String -> Integer
readOctNoO (Char
zero:String
rest) = String -> Integer
forall a. Read a => String -> a
read (Char
zeroChar -> String -> String
forall a. a -> [a] -> [a]
:Char
'O'Char -> String -> String
forall a. a -> [a] -> [a]
:String
rest)
readOctNoO [] = String -> Integer
forall a. HasCallStack => String -> a
error String
"Lexer ensures readOctNoO is never called on an empty string"