Ticket #157: haddock-index.hs

File haddock-index.hs, 9.4 kB (added by claus, 4 years ago)

generate text index of haddock urls, from .haddock files and ghc-pkg database

Line 
1{-
2 - Read .haddock files, pkg database, build documentation index;
3 - for each documented ID, output list of modules it appears in,
4 -  with the URLs into the Haddock HTML pages
5 -
6 - Dependencies: haddock, cabal, ghc, ghc-paths, ..
7 -
8 - Usage:
9 -  haddock-index [--installed] [--withDocs] {[x/html,]x.haddock}*
10 -
11 - Options:
12 -  --installed (process all installed .haddock files)
13 -  --withDocs  (add documentation from .haddock files)
14 -
15 - this is similar to haddock's --gen-index, as invoked from cabal,
16 - but we need control over the output format (with a little more
17 - tinkering, Vim will be able to read this in as one of its nested
18 - maps/hashtables, so haskellmode can use it for documentation lookup,
19 - identifier completion, and so on; other IDEs will need slightly
20 - different formats; most users will still want the HTML index in
21 - addition to what the IDEs are using), so the haddock library should
22 - just expose the index in Haskell, for further processing;
23 -
24 - the same index, exposed by haddock library, could also serve to
25 - implement commandline doc lookup, as requested in
26 - http://hackage.haskell.org/trac/ghc/ticket/2168
27 -}
28module Main where
29
30import System.Environment
31import Data.List
32import Data.Maybe(catMaybes)
33import Data.Char(toUpper,isAlpha,isAscii,isAlphaNum,ord)
34import qualified Data.Map as Map
35
36import qualified OccName
37import qualified Name
38import qualified Module
39import qualified Packages
40import qualified DynFlags
41import MonadUtils(liftIO)
42import qualified GHC
43
44import qualified GHC.Paths as Paths
45import qualified Distribution.InstalledPackageInfo as PI
46import Documentation.Haddock(freshNameCache
47                            ,ifInstalledIfaces
48                            ,readInterfaceFile
49                            ,InstalledInterface
50                            ,instVisibleExports
51                            ,instExports
52                            ,instDocMap
53                            ,instMod
54                            ,Doc(..)
55                            ,exampleExpression
56                            ,exampleResult)
57
58instance Show Name.Name         where show name  = Name.getOccString name
59instance Show Module.PackageId  where show pid   = Module.packageIdString pid
60instance Show Module.ModuleName where show mname = Module.moduleNameString mname
61
62main = do
63  flagArgs <- getArgs
64  (withDocs,args) <- getOptsArgs (False,flagArgs)
65  iffs <- readInterfaceFiles freshNameCache
66            [ parseIfaceOption interfaceFile
67            | interfaceFile <- args ]
68  (pathMap,ifaces) <- processFile Map.empty [] iffs
69  let id_index = index ifaces
70  mapM_ (printElt pathMap withDocs . indexElt) $ id_index
71
72-- file parameters come in htmldir,interfacefile format for each
73-- package; extract Map from package id to htmldir, pass on plain
74-- interfacefile list
75processFile m ifs []                     
76  = return (m,concat ifs)
77processFile m ifs (((path,_),file):files)
78  = processFile (Map.insert package path m) (ifaces:ifs) files
79  where ifaces@(iface:_) = ifInstalledIfaces file
80        package = Module.modulePackageId $ instMod iface
81
82-- process options, optionally fetch ghc-pkg info
83getOptsArgs (withDocs,("--installed":args)) = do
84  installedHaddocks <- haddockPaths
85  getOptsArgs (withDocs,args++installedHaddocks)
86getOptsArgs (withDocs,("--withDocs":args)) =
87  getOptsArgs (True,args)
88getOptsArgs optargs                        =
89  return optargs
90
91-- using GHC API to access ghc-pkg database for ghc installed packages,
92-- get the paths to their .haddock interface files, prefixed with their
93-- haddock HTML dirs: package-htmldir,package.haddock
94haddockPaths = GHC.runGhc (Just Paths.libdir) $ do
95  dflags <- GHC.getSessionDynFlags
96  (dflags,pkgids) <- liftIO $ Packages.initPackages dflags
97  case DynFlags.pkgDatabase dflags of
98    Nothing    -> return []
99    Just pkgDB -> return $ concatMap package pkgDB
100  where
101  package pkg = case (PI.haddockInterfaces pkg,PI.haddockHTMLs pkg) of
102                  ([iface],[html]) -> [html++","++iface]
103                  _                -> [] -- TODO: any other useable cases?
104
105-- construct list of Module, Url, Doc comments for all exposed entries
106indexElt (nm,entities) = (nm,map links $ Map.toAscList entities)
107links (nm,entries) = [ ( mdl
108                       , moduleNameUrl mdl (Name.nameOccName nm)
109                       , doc
110                       )
111                     | (mdl,doc,True) <- entries ]
112
113-- simple text format:
114--
115-- name
116--  module1 : url1
117--  module2 : url2
118--  ..
119printElt pathMap withDoc (nm,modules) = do
120  putStrLn nm 
121  mapM_ (printModule pathMap withDoc) $ concat modules
122
123printModule pathMap withDoc (mdl,url,doc) =
124  putStrLn $ "  "++mdl_name++" : "++normalize ((pathMap Map.! package)++"/"++url)
125           ++if withDoc
126             then "\n"++maybe "none" id doc++"\n"
127             else ""
128  where mdl_name = Module.moduleNameString $ Module.moduleName mdl
129        package  = Module.modulePackageId mdl
130        -- lets not get confused by Windows backslashes
131        normalize ('\\':'\\':rest) = '/':normalize rest
132        normalize ('\\':rest)      = '/':normalize rest
133        normalize (c:rest)         = c:normalize rest
134        normalize []               = []
135
136-- TODO: proper formatting, perhaps the --hoogle pretty-print code?
137-- comments are also in .haddock - interesting possibilities..
138-- for now, just dump them out somehow
139showDoc (DocEmpty)            = ""
140showDoc (DocAppend a b)       = showDoc a ++ showDoc b
141showDoc (DocString s)         = s
142showDoc (DocParagraph p)      = "\n"++showDoc p++"\n"
143showDoc (DocIdentifier ids)   = concat [show id++" "|id<-ids]
144showDoc (DocModule s)         = s
145showDoc (DocEmphasis d)       = showDoc d
146showDoc (DocMonospaced d)     = showDoc d
147showDoc (DocUnorderedList ds) = concatMap showDoc ds
148showDoc (DocOrderedList ds)   = concatMap showDoc ds
149showDoc (DocDefList dds)      = concat [showDoc a++showDoc b|(a,b)<-dds]
150showDoc (DocCodeBlock d)      = showDoc d
151showDoc (DocURL s)            = s
152showDoc (DocPic s)            = s
153showDoc (DocAName s)          = s
154showDoc (DocExamples es)      = concatMap showExample es
155
156showExample e = exampleExpression e++concat (exampleResult e)
157
158---------------------------------------------------------------------
159-- the rest is cannibalized from haddock,
160-- where it should be exported in some form or other
161--
162-- roughly, we'd need:
163-- - build haddock index (similar to most backends, --gen-index)
164-- - construct URLs (hidden in XHtml backend)
165-- - pretty-print comments to text (similar to Hoogle backend)
166--
167-- the hidden global xref variables aren't helping, either
168---------------------------------------------------------------------
169
170parseIfaceOption str =
171  case break (==',') str of
172    (fpath, ',':rest) ->
173      case break (==',') rest of
174        (src, ',':file) -> ((fpath, Just src), file)
175        (file, _) -> ((fpath, Nothing), file)
176    (file, _) -> (("", Nothing), file)
177
178readInterfaceFiles name_cache_accessor pairs = do
179  mbPackages <- mapM tryReadIface pairs
180  return (catMaybes mbPackages)
181  where
182    -- try to read an interface, warn if we can't
183    tryReadIface (paths, file) = do
184      eIface <- readInterfaceFile name_cache_accessor file
185      case eIface of
186        Left err -> do
187          putStrLn ("Warning: Cannot read " ++ file ++ ":")
188          putStrLn ("   " ++ err)
189          putStrLn "Skipping this interface."
190          return Nothing
191        Right f -> return $ Just (paths, f)
192
193-- for each name (a plain string), we have a number of original HsNames that
194-- it can refer to, and for each of those we have a list of modules
195-- that export that entity.  Each of the modules exports the entity
196-- in a visible or invisible way (hence the Bool).
197full_index :: [InstalledInterface]
198           -> Map.Map String (Map.Map Name.Name [(Module.Module,Maybe String,Bool)])
199full_index ifaces = Map.fromListWith (flip (Map.unionWith (++)))
200                     (concat (map getIfaceIndex ifaces))
201
202index :: [InstalledInterface]
203      -> [(String, Map.Map Name.Name [(Module.Module,Maybe String,Bool)])]
204index ifaces = sortBy cmp $ Map.toAscList $ full_index ifaces
205  where cmp (n1,_) (n2,_) = map toUpper n1 `compare` map toUpper n2
206
207getIfaceIndex iface =
208  [ (Name.getOccString name
209     , Map.fromList [(name, [(mdl, fmap showD $ name `Map.lookup` docMap
210                                 , name `elem` instVisibleExports iface)])])
211     | name <- instExports iface
212     , let docMap = instDocMap iface ]
213  where mdl = instMod iface
214        showD (mbDoc,fnargsDoc) = maybe "none" showDoc mbDoc
215
216moduleNameUrl mdl nm = moduleHtmlFile mdl ++ '#' : nameAnchorId nm
217moduleHtmlFile mdl = mdl' ++ ".html"
218  where
219  mdl' = map (\c -> if c == '.' then '-' else c)
220             (Module.moduleNameString (Module.moduleName mdl))
221
222nameAnchorId name = makeAnchorId (prefix : ':' : OccName.occNameString name)
223 where prefix | OccName.isValOcc name = 'v'
224              | otherwise             = 't'
225
226makeAnchorId :: String -> String
227makeAnchorId [] = []
228makeAnchorId (f:r) = escape isAlpha f ++ concatMap (escape isLegal) r
229  where
230    escape p c | p c = [c]
231               | otherwise = '-' : (show (ord c)) ++ "-"
232    isLegal ':' = True
233    isLegal '_' = True
234    isLegal '.' = True
235    isLegal c = isAscii c && isAlphaNum c
236       -- NB: '-' is legal in IDs, but we use it as the escape char