Fix wrong hyphenation pattern
[ccbib.git] / pandocTools / extractByTag.hs
bloba8af8b1aa7330e6ff784b2c631eb20a1080b8212
1 -- extractByTag.hs
2 --
3 -- Extract any text that has been markup up with a given tag.
5 import Text.HTML.TagSoup
6 import System.IO
7 import System.Environment (getArgs)
8 import Data.List.HT (breakAfter)
10 checkArgs :: [String] -> Bool
11 checkArgs [] = True
12 checkArgs x = any (elem '-') x
14 helpstring = "Bad Arguments!\n"
16 main :: IO ()
17 main = do
18 args <- getArgs
19 if (checkArgs args)
20 then hPutStr stderr helpstring
21 else interact (unlines . extractByTag args)
23 extractByTag :: [String] -> String -> [String]
24 extractByTag tags s = map renderTags (filterTags tags (parseTags s))
26 filterTags :: [String] -> [Tag String] -> [[Tag String]]
27 filterTags _ [] = []
28 filterTags tags (t:ts)
29 | isStartTag tags t =
30 let (good, rest) = breakAfter (isStopTag (tagStr t)) (t:ts)
31 in good:(filterTags tags rest)
32 | otherwise = filterTags tags ts
34 isStartTag :: [String] -> Tag String -> Bool
35 isStartTag tags (TagOpen str _) = elem str tags
36 isStartTag _ _ = False
38 tagStr :: Tag String -> String
39 tagStr (TagOpen s _) = s
40 tagStr (TagClose s) = s
41 tagStr _ = []
43 isStopTag :: String -> Tag String -> Bool
44 isStopTag tag (TagClose str) = (tag == str)
45 isStopTag _ _ = False