Ticket #20: Haddock-Unicode.patch

File Haddock-Unicode.patch, 4.9 kB (added by batterseapower, 20 months ago)
  • haddock.cabal

    From f03a5f7646896ae6883493410c3a4050a9eafe08 Mon Sep 17 00:00:00 2001
    From: Max Bolingbroke <batterseapower@hotmail.com>
    Date: Sun, 3 Feb 2013 18:56:18 +0000
    Subject: [PATCH] Use Alex 3's Unicode support to properly lex source files as
     UTF-8
    
    ---
     haddock.cabal     |  4 ++--
     src/Haddock/Lex.x | 65 +++++++++++++++++++++++++++++++++++--------------------
     2 files changed, 44 insertions(+), 25 deletions(-)
    
    diff --git a/haddock.cabal b/haddock.cabal
    index ddf0ce6..779e302 100644
    a b  
    8181  -- In a GHC tree - in particular, in a source tarball - we don't 
    8282  -- require alex or happy 
    8383  if !flag(in-ghc-tree) 
    84     build-tools: alex >= 2.3, happy >= 1.18 
     84    build-tools: alex >= 3, happy >= 1.18 
    8585  build-depends: 
    8686    base >= 4.3 && < 4.8, 
    8787    filepath, 
     
    144144  -- In a GHC tree - in particular, in a source tarball - we don't 
    145145  -- require alex or happy 
    146146  if !flag(in-ghc-tree) 
    147     build-tools: alex >= 2.3, happy >= 1.18 
     147    build-tools: alex >= 3, happy >= 1.18 
    148148  build-depends: 
    149149    base >= 4.3 && < 4.8, 
    150150    filepath, 
  • src/Haddock/Lex.x

    diff --git a/src/Haddock/Lex.x b/src/Haddock/Lex.x
    index b9ebe68..5b351b6 100644
    a b  
    3030import DynFlags 
    3131import FastString 
    3232 
     33import qualified Data.Bits 
    3334import Data.Char 
    3435import Data.Word (Word8) 
    3536import Numeric 
     
    145146-- Unicode/UTF-8 support in Alex 3.x, and Unicode documentation will 
    146147-- probably get mangled. 
    147148 
     149-- | Encode a Haskell String to a list of Word8 values, in UTF8 format. 
     150utf8Encode :: Char -> [Word8] 
     151utf8Encode = map fromIntegral . go . ord 
     152 where 
     153  go oc 
     154   | oc <= 0x7f       = [oc] 
     155 
     156   | oc <= 0x7ff      = [ 0xc0 + (oc `Data.Bits.shiftR` 6) 
     157                        , 0x80 + oc Data.Bits..&. 0x3f 
     158                        ] 
     159 
     160   | oc <= 0xffff     = [ 0xe0 + (oc `Data.Bits.shiftR` 12) 
     161                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) 
     162                        , 0x80 + oc Data.Bits..&. 0x3f 
     163                        ] 
     164   | otherwise        = [ 0xf0 + (oc `Data.Bits.shiftR` 18) 
     165                        , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) 
     166                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) 
     167                        , 0x80 + oc Data.Bits..&. 0x3f 
     168                        ] 
     169 
     170type Byte = Word8 
     171 
    148172type AlexInput = (AlexPosn,     -- current position, 
    149173                  Char,         -- previous char 
     174                  [Byte],       -- pending bytes on current char 
    150175                  String)       -- current input string 
    151176 
    152177alexInputPrevChar :: AlexInput -> Char 
    153 alexInputPrevChar (p,c,s) = c 
     178alexInputPrevChar (p,c,bs,s) = c 
    154179 
    155 alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) 
    156 alexGetByte (p,c,[]) = Nothing 
    157 alexGetByte (p,_,(c:s))  = let p' = alexMove p c 
    158                               in p' `seq`  Just (fromIntegral (ord c), (p', c, s)) 
     180alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) 
     181alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s)) 
     182alexGetByte (p,c,[],[]) = Nothing 
     183alexGetByte (p,_,[],(c:s))  = let p' = alexMove p c  
     184                                  (b:bs) = utf8Encode c 
     185                              in p' `seq`  Just (b, (p', c, bs, s)) 
    159186 
    160 -- for compat with Alex 2.x: 
    161 alexGetChar :: AlexInput -> Maybe (Char,AlexInput) 
    162 alexGetChar i = case alexGetByte i of 
    163                   Nothing     -> Nothing 
    164                   Just (b,i') -> Just (chr (fromIntegral b), i') 
     187data AlexPosn = AlexPn !Int !Int !Int 
     188        deriving (Eq,Show) 
    165189 
    166190alexMove :: AlexPosn -> Char -> AlexPosn 
    167191alexMove (AlexPn a l c) '\t' = AlexPn (a+1)  l     (((c+7) `div` 8)*8+1) 
    168192alexMove (AlexPn a l c) '\n' = AlexPn (a+1) (l+1)   1 
    169193alexMove (AlexPn a l c) _    = AlexPn (a+1)  l     (c+1) 
    170194 
    171 data AlexPosn = AlexPn !Int !Int !Int 
    172         deriving (Eq,Show) 
    173  
    174195type StartCode = Int 
    175196type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> DynFlags -> [LToken] 
    176197 
    177198tokenise :: DynFlags -> String -> (Int, Int) -> [LToken] 
    178 tokenise dflags str (line, col) = let toks = go (posn, '\n', eofHack str) para in {-trace (show toks)-} toks 
    179   where 
    180     posn = AlexPn 0 line col 
    181  
    182     go inp@(pos, _, str) sc = 
    183           case alexScan inp sc of 
    184                 AlexEOF -> [] 
    185                 AlexError _ -> [] 
    186                 AlexSkip  inp' _       -> go inp' sc 
    187                 AlexToken inp'@(pos',_,_) len act -> act pos (take len str) sc (\sc -> go inp' sc) dflags 
     199tokenise dflags str (line, col) = go (posn,'\n',[],eofHack str) para 
     200  where posn = AlexPn 0 line col 
     201        go inp@(pos,_,_,str) sc = 
     202          case alexScan inp sc of 
     203                AlexEOF -> [] 
     204                AlexError _ -> [] 
     205                AlexSkip  inp' len     -> go inp' sc 
     206                AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc) dflags 
    188207 
    189208-- NB. we add a final \n to the string, (see comment in the beginning of line 
    190209-- production above).