From b12d4d57b1562ecb65a949b7a012230ce2461201 Mon Sep 17 00:00:00 2001 From: Alex Washburn Date: Sat, 12 May 2018 14:11:54 -0400 Subject: [PATCH] Updating changelog, Adding test case for issue #5309 --- Cabal/ChangeLog.md | 5 + Cabal/Distribution/Simple/GHC.hs | 69 +++-- Cabal/Distribution/Types/InstalledPackageInfo.hs | 1 + Cabal/tests/CheckTests.hs | 2 + Cabal/tests/ParserTests/ipi/Includes2.expr | 1 + .../ipi/internal-preprocessor-test.expr | 1 + .../tests/ParserTests/ipi/issue-2276-ghc-9885.expr | 1 + Cabal/tests/ParserTests/ipi/transformers.expr | 1 + .../regressions/cc-options-with-optimization.cabal | 15 ++ .../regressions/cc-options-with-optimization.check | 1 + .../cxx-options-with-optimization.cabal | 15 ++ .../cxx-options-with-optimization.check | 1 + .../PackageTests/Regression/T5309/T5309.cabal | 149 +++++++++++ .../PackageTests/Regression/T5309/app/Main.hs | 9 + .../PackageTests/Regression/T5309/cabal.out | 57 +++++ .../PackageTests/Regression/T5309/cabal.project | 2 + .../PackageTests/Regression/T5309/cabal.test.hs | 5 + .../T5309/lib/Bio/Character/Exportable/Class.hs | 56 +++++ .../Regression/T5309/lib/Data/TCM/Memoized.hs | 45 ++++ .../Regression/T5309/lib/Data/TCM/Memoized/FFI.hsc | 280 +++++++++++++++++++++ .../Regression/T5309/memoized-tcm/costMatrix.cpp | 93 +++++++ .../Regression/T5309/memoized-tcm/costMatrix.h | 201 +++++++++++++++ .../T5309/memoized-tcm/costMatrixWrapper.c | 28 +++ .../T5309/memoized-tcm/costMatrixWrapper.h | 24 ++ .../memoized-tcm/dynamicCharacterOperations.c | 35 +++ .../memoized-tcm/dynamicCharacterOperations.h | 36 +++ 26 files changed, 1117 insertions(+), 16 deletions(-) create mode 100644 Cabal/tests/ParserTests/regressions/cc-options-with-optimization.cabal create mode 100644 Cabal/tests/ParserTests/regressions/cc-options-with-optimization.check create mode 100644 Cabal/tests/ParserTests/regressions/cxx-options-with-optimization.cabal create mode 100644 Cabal/tests/ParserTests/regressions/cxx-options-with-optimization.check create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/T5309.cabal create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/app/Main.hs create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/cabal.out create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/cabal.project create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/cabal.test.hs create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/lib/Bio/Character/Exportable/Class.hs create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/lib/Data/TCM/Memoized.hs create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/lib/Data/TCM/Memoized/FFI.hsc create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrix.cpp create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrix.h create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrixWrapper.c create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrixWrapper.h create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/dynamicCharacterOperations.c create mode 100644 cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/dynamicCharacterOperations.h diff --git a/Cabal/ChangeLog.md b/Cabal/ChangeLog.md index 334c6f365..783ffb6fd 100644 --- a/Cabal/ChangeLog.md +++ b/Cabal/ChangeLog.md @@ -20,6 +20,11 @@ `Distribution.Simple.Glob` and `FileGlob` has been made abstract. (#5284, #3178, et al.) + * Fixed `cxx-options` and `cxx-sources` buildinfo fields for + separate compilation of C++ source files to correctly build and link + non-library components (#5309). + * Reduced warnings generated by hsc2hs and c2hs when `cxx-options` field + is present in a component. ---- diff --git a/Cabal/Distribution/Simple/GHC.hs b/Cabal/Distribution/Simple/GHC.hs index c446327d8..3a68595ad 100644 --- a/Cabal/Distribution/Simple/GHC.hs +++ b/Cabal/Distribution/Simple/GHC.hs @@ -666,10 +666,10 @@ buildOrReplLib forRepl verbosity numJobs pkg_descr lbi lib clbi = do ghcOptObjSuffix = toFlag "p_o" } sharedCxxOpts = vanillaCxxOpts `mappend` mempty { - ghcOptFPic = toFlag True, - ghcOptDynLinkMode = toFlag GhcDynamicOnly, - ghcOptObjSuffix = toFlag "dyn_o" - } + ghcOptFPic = toFlag True, + ghcOptDynLinkMode = toFlag GhcDynamicOnly, + ghcOptObjSuffix = toFlag "dyn_o" + } odir = fromFlag (ghcOptObjDir vanillaCxxOpts) createDirectoryIfMissingVerbose verbosity True odir let runGhcProgIfNeeded cxxOpts = do @@ -1083,12 +1083,27 @@ decodeMainIsArg arg -- 'tail' drops the char satisfying 'pred' where (r_suf, r_pre) = break pred' (reverse str) --- | Return C sources, GHC input files and GHC input modules + +-- | A collection of: +-- * C input files +-- * C++ input files +-- * GHC input files +-- * GHC input modules +-- +-- Used to correctly build and link sources. +data BuildSources = BuildSources { + cSourcesFiles :: [FilePath], + cxxSourceFiles :: [FilePath], + inputSourceFiles :: [FilePath], + inputSourceModules :: [ModuleName] + } + +-- | Locate and return the 'BuildSources' required to build and link. gbuildSources :: Verbosity -> Version -- ^ specVersion -> FilePath -> GBuildMode - -> IO ([FilePath], [FilePath], [FilePath], [ModuleName]) + -> IO BuildSources gbuildSources verbosity specVer tmpDir bm = case bm of GBuildExe exe -> exeSources exe @@ -1096,7 +1111,7 @@ gbuildSources verbosity specVer tmpDir bm = GBuildFLib flib -> return $ flibSources flib GReplFLib flib -> return $ flibSources flib where - exeSources :: Executable -> IO ([FilePath], [FilePath], [FilePath], [ModuleName]) + exeSources :: Executable -> IO BuildSources exeSources exe@Executable{buildInfo = bnfo, modulePath = modPath} = do main <- findFile (tmpDir : hsSourceDirs bnfo) modPath let mainModName = fromMaybe ModuleName.main $ exeMainModuleName exe @@ -1121,15 +1136,34 @@ gbuildSources verbosity specVer tmpDir bm = ++ display mainModName ++ "' listed in 'other-modules' illegally!" - return (cSources bnfo, cxxSources bnfo, [main], - filter (/= mainModName) (exeModules exe)) + return BuildSources { + cSourcesFiles = cSources bnfo, + cxxSourceFiles = cxxSources bnfo, + inputSourceFiles = [main], + inputSourceModules = filter (/= mainModName) $ exeModules exe + } - else return (cSources bnfo, cxxSources bnfo, [main], exeModules exe) - else return (main : cSources bnfo, main : cxxSources bnfo, [], exeModules exe) + else return BuildSources { + cSourcesFiles = cSources bnfo, + cxxSourceFiles = cxxSources bnfo, + inputSourceFiles = [main], + inputSourceModules = exeModules exe + } + else return BuildSources { + cSourcesFiles = main : cSources bnfo, + cxxSourceFiles = main : cxxSources bnfo, + inputSourceFiles = [], + inputSourceModules = exeModules exe + } - flibSources :: ForeignLib -> ([FilePath], [FilePath], [FilePath], [ModuleName]) + flibSources :: ForeignLib -> BuildSources flibSources flib@ForeignLib{foreignLibBuildInfo = bnfo} = - (cSources bnfo, cxxSources bnfo, [], foreignLibModules flib) + BuildSources { + cSourcesFiles = cSources bnfo, + cxxSourceFiles = cxxSources bnfo, + inputSourceFiles = [], + inputSourceModules = foreignLibModules flib + } isHaskell :: FilePath -> Bool isHaskell fp = elem (takeExtension fp) [".hs", ".lhs"] @@ -1168,10 +1202,13 @@ gbuild verbosity numJobs pkg_descr lbi bm clbi = do | otherwise = mempty rpaths <- getRPaths lbi clbi - (cSrcs, cxxSrcs, inputFiles, inputModules) <- gbuildSources verbosity - (specVersion pkg_descr) tmpDir bm + buildSources <- gbuildSources verbosity (specVersion pkg_descr) tmpDir bm - let isGhcDynamic = isDynamic comp + let cSrcs = cSourcesFiles buildSources + cxxSrcs = cxxSourceFiles buildSources + inputFiles = inputSourceFiles buildSources + inputModules = inputSourceModules buildSources + isGhcDynamic = isDynamic comp dynamicTooSupported = supportsDynamicToo comp cObjs = map (`replaceExtension` objExtension) cSrcs cxxObjs = map (`replaceExtension` objExtension) cxxSrcs diff --git a/Cabal/Distribution/Types/InstalledPackageInfo.hs b/Cabal/Distribution/Types/InstalledPackageInfo.hs index 6e4e247a6..d95d86fdf 100644 --- a/Cabal/Distribution/Types/InstalledPackageInfo.hs +++ b/Cabal/Distribution/Types/InstalledPackageInfo.hs @@ -160,6 +160,7 @@ emptyInstalledPackageInfo depends = [], abiDepends = [], ccOptions = [], + cxxOptions = [], ldOptions = [], frameworkDirs = [], frameworks = [], diff --git a/Cabal/tests/CheckTests.hs b/Cabal/tests/CheckTests.hs index 86a94d4d4..a4dabd5f8 100644 --- a/Cabal/tests/CheckTests.hs +++ b/Cabal/tests/CheckTests.hs @@ -33,6 +33,8 @@ checkTests = testGroup "regressions" , checkTest "pre-1.6-glob.cabal" , checkTest "pre-3.0-globstar.cabal" , checkTest "bad-glob-syntax.cabal" + , checkTest "cc-options-with-optimization.cabal" + , checkTest "cxx-options-with-optimization.cabal" ] checkTest :: FilePath -> TestTree diff --git a/Cabal/tests/ParserTests/ipi/Includes2.expr b/Cabal/tests/ParserTests/ipi/Includes2.expr index 6b76b3f56..0d3a02d8d 100644 --- a/Cabal/tests/ParserTests/ipi/Includes2.expr +++ b/Cabal/tests/ParserTests/ipi/Includes2.expr @@ -7,6 +7,7 @@ InstalledPackageInfo ccOptions = [], compatPackageKey = "Includes2-0.1.0.0-inplace-mylib+3gY9SyjX86dBypHcOaev1n", copyright = "", + cxxOptions = [], dataDir = "/home/travis/build/haskell/cabal/cabal-testsuite/PackageTests/Backpack/Includes2", depends = [`UnitId "base-4.10.1.0"`, `UnitId "Includes2-0.1.0.0-inplace-mysql"`], diff --git a/Cabal/tests/ParserTests/ipi/internal-preprocessor-test.expr b/Cabal/tests/ParserTests/ipi/internal-preprocessor-test.expr index 6f9ecf21b..bc5ae4258 100644 --- a/Cabal/tests/ParserTests/ipi/internal-preprocessor-test.expr +++ b/Cabal/tests/ParserTests/ipi/internal-preprocessor-test.expr @@ -6,6 +6,7 @@ InstalledPackageInfo ccOptions = [], compatPackageKey = "internal-preprocessor-test-0.1.0.0", copyright = "", + cxxOptions = [], dataDir = "/home/ogre/Documents/other-haskell/cabal/cabal-testsuite/PackageTests/CustomPreProcess", depends = [`UnitId "base-4.8.2.0-0d6d1084fbc041e1cded9228e80e264d"`], description = "See https://github.com/haskell/cabal/issues/1541#issuecomment-30155513", diff --git a/Cabal/tests/ParserTests/ipi/issue-2276-ghc-9885.expr b/Cabal/tests/ParserTests/ipi/issue-2276-ghc-9885.expr index 0a867b53c..f39d1e71d 100644 --- a/Cabal/tests/ParserTests/ipi/issue-2276-ghc-9885.expr +++ b/Cabal/tests/ParserTests/ipi/issue-2276-ghc-9885.expr @@ -6,6 +6,7 @@ InstalledPackageInfo ccOptions = [], compatPackageKey = "transformers-0.5.2.0", copyright = "", + cxxOptions = [], dataDir = "/opt/ghc/8.2.2/share/x86_64-linux-ghc-8.2.2/transformers-0.5.2.0", depends = [`UnitId "base-4.10.1.0"`], description = concat diff --git a/Cabal/tests/ParserTests/ipi/transformers.expr b/Cabal/tests/ParserTests/ipi/transformers.expr index ba0830b14..429883f3d 100644 --- a/Cabal/tests/ParserTests/ipi/transformers.expr +++ b/Cabal/tests/ParserTests/ipi/transformers.expr @@ -4,6 +4,7 @@ InstalledPackageInfo author = "Andy Gill, Ross Paterson", category = "Control", ccOptions = [], + cxxOptions = [], compatPackageKey = "transformers-0.5.2.0", copyright = "", dataDir = "/opt/ghc/8.2.2/share/x86_64-linux-ghc-8.2.2/transformers-0.5.2.0", diff --git a/Cabal/tests/ParserTests/regressions/cc-options-with-optimization.cabal b/Cabal/tests/ParserTests/regressions/cc-options-with-optimization.cabal new file mode 100644 index 000000000..6eb8cec68 --- /dev/null +++ b/Cabal/tests/ParserTests/regressions/cc-options-with-optimization.cabal @@ -0,0 +1,15 @@ +cabal-version: 2.2 +category: test +description: test a build check involving C++-options field +license: BSD-3-Clause +maintainer: me@example.com +name: cxx-options-with-optimization +synopsis: test a build check +version: 1 + +library + build-depends: base >= 4.9 && <4.10 + cc-options: -O2 + default-language: Haskell2010 + exposed-modules: Prelude + hs-source-dirs: . diff --git a/Cabal/tests/ParserTests/regressions/cc-options-with-optimization.check b/Cabal/tests/ParserTests/regressions/cc-options-with-optimization.check new file mode 100644 index 000000000..16cfdb255 --- /dev/null +++ b/Cabal/tests/ParserTests/regressions/cc-options-with-optimization.check @@ -0,0 +1 @@ +'cc-options: -O[n]' is generally not needed. When building with optimisations Cabal automatically adds '-O2' for C code. Setting it yourself interferes with the --disable-optimization flag. diff --git a/Cabal/tests/ParserTests/regressions/cxx-options-with-optimization.cabal b/Cabal/tests/ParserTests/regressions/cxx-options-with-optimization.cabal new file mode 100644 index 000000000..d081a5dd6 --- /dev/null +++ b/Cabal/tests/ParserTests/regressions/cxx-options-with-optimization.cabal @@ -0,0 +1,15 @@ +cabal-version: 2.2 +category: test +description: test a build check involving C++-options field +license: BSD-3-Clause +maintainer: me@example.com +name: cxx-options-with-optimization +synopsis: test a build check +version: 1 + +library + build-depends: base >= 4.9 && <4.10 + cxx-options: -O2 + default-language: Haskell2010 + exposed-modules: Prelude + hs-source-dirs: . diff --git a/Cabal/tests/ParserTests/regressions/cxx-options-with-optimization.check b/Cabal/tests/ParserTests/regressions/cxx-options-with-optimization.check new file mode 100644 index 000000000..822bea388 --- /dev/null +++ b/Cabal/tests/ParserTests/regressions/cxx-options-with-optimization.check @@ -0,0 +1 @@ +'cxx-options: -O[n]' is generally not needed. When building with optimisations Cabal automatically adds '-O2' for C++ code. Setting it yourself interferes with the --disable-optimization flag. diff --git a/cabal-testsuite/PackageTests/Regression/T5309/T5309.cabal b/cabal-testsuite/PackageTests/Regression/T5309/T5309.cabal new file mode 100644 index 000000000..a19f4360c --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/T5309.cabal @@ -0,0 +1,149 @@ +cabal-version: 2.2 +category: Example +build-type: Simple + +name: T5309 +version: 1.0.0.0 + +author: Alex Washburn +maintainer: github@recursion.ninja +copyright: 2018 Alex Washburn (recursion.ninja) + +synopsis: A binding to a C++ hashtable for thread-safe memoization. + +description: This package is designed to provide a "minimal working example" + to test the cxx-sources and the cxx-options buildinfo flags. + The code was pulled out PCG, https://github.com/amnh/pcg + + +common ffi-build-info + + -- We must provide the full relative path to every C file that the project depends on. + c-sources: memoized-tcm/costMatrixWrapper.c + memoized-tcm/dynamicCharacterOperations.c + + cc-options: --std=c11 + + cxx-sources: memoized-tcm/costMatrix.cpp + + cxx-options: --std=c++11 + + default-language: Haskell2010 + + -- This library is required for the C++ standard template library. + extra-libraries: stdc++ + + -- Here we list all directories that contain C header files that the FFI tools will need + -- to locate when preprocessing the C files. Without listing the directories containing + -- the C header files here, the FFI preprocession (hsc2hs, c2hs,etc.) will fail to locate + -- the requisite files. + -- Note also, that the parent directory of the nessicary C header files must be specified. + -- The preprocesser will not recursively look in subdirectories for C header files! + include-dirs: memoized-tcm + + +common language-spec + + build-depends: base >=4.5.1 +-- , lens + + default-language: Haskell2010 + + ghc-options: -O2 -Wall + + +common lib-build-info + + hs-source-dirs: lib + + -- Modules exported by the library. + other-modules: Bio.Character.Exportable.Class + Data.TCM.Memoized + Data.TCM.Memoized.FFI + + +library + + import: ffi-build-info + , language-spec + + -- Modules exported by the library. + exposed-modules: Bio.Character.Exportable.Class + Data.TCM.Memoized + Data.TCM.Memoized.FFI + + hs-source-dirs: lib + + +executable exe-no-lib + + import: ffi-build-info + , language-spec + , lib-build-info + + main-is: Main.hs + + hs-source-dirs: app + + +executable exe-with-lib + + import: language-spec + + main-is: Main.hs + + build-depends: T5309 + + hs-source-dirs: app + + +benchmark bench-no-lib + + import: ffi-build-info + , language-spec + , lib-build-info + + main-is: Main.hs + + type: exitcode-stdio-1.0 + + hs-source-dirs: app + + +benchmark bench-with-lib + + import: language-spec + + main-is: Main.hs + + type: exitcode-stdio-1.0 + + build-depends: T5309 + + hs-source-dirs: app + + +test-suite test-no-lib + + import: ffi-build-info + , language-spec + , lib-build-info + + main-is: Main.hs + + type: exitcode-stdio-1.0 + + hs-source-dirs: app + + +test-suite test-with-lib + + import: language-spec + + main-is: Main.hs + + type: exitcode-stdio-1.0 + + build-depends: T5309 + + hs-source-dirs: app diff --git a/cabal-testsuite/PackageTests/Regression/T5309/app/Main.hs b/cabal-testsuite/PackageTests/Regression/T5309/app/Main.hs new file mode 100644 index 000000000..076ddb806 --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/app/Main.hs @@ -0,0 +1,9 @@ +{-# LANGUAGE FlexibleContexts, TypeFamilies #-} + +module Main (main) where + +import Data.TCM.Memoized + + +main :: IO () +main = generateMemoizedTransitionCostMatrix 5 (const (const 1)) `seq` return () diff --git a/cabal-testsuite/PackageTests/Regression/T5309/cabal.out b/cabal-testsuite/PackageTests/Regression/T5309/cabal.out new file mode 100644 index 000000000..0606b7218 --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/cabal.out @@ -0,0 +1,57 @@ +# cabal new-build +Resolving dependencies... +Build profile: -w ghc- -O1 +In order, the following will be built: + - T5309-1.0.0.0 (lib) (first run) + - T5309-1.0.0.0 (exe:exe-no-lib) (first run) + - T5309-1.0.0.0 (exe:exe-with-lib) (first run) +Configuring library for T5309-1.0.0.0.. +Preprocessing library for T5309-1.0.0.0.. +Building library for T5309-1.0.0.0.. +Configuring executable 'exe-no-lib' for T5309-1.0.0.0.. +Preprocessing executable 'exe-no-lib' for T5309-1.0.0.0.. +Building executable 'exe-no-lib' for T5309-1.0.0.0.. +Configuring executable 'exe-with-lib' for T5309-1.0.0.0.. +Warning: The package has an extraneous version range for a dependency on an internal library: T5309 -any && ==1.0.0.0, T5309 -any && ==1.0.0.0, T5309 -any && ==1.0.0.0. This version range includes the current package but isn't needed as the current package's library will always be used. +Preprocessing executable 'exe-with-lib' for T5309-1.0.0.0.. +Building executable 'exe-with-lib' for T5309-1.0.0.0.. +# cabal new-test +Build profile: -w ghc- -O1 +In order, the following will be built: + - T5309-1.0.0.0 (test:test-no-lib) (first run) + - T5309-1.0.0.0 (test:test-with-lib) (first run) +Configuring test suite 'test-no-lib' for T5309-1.0.0.0.. +Preprocessing test suite 'test-no-lib' for T5309-1.0.0.0.. +Building test suite 'test-no-lib' for T5309-1.0.0.0.. +Running 1 test suites... +Test suite test-no-lib: RUNNING... +Test suite test-no-lib: PASS +Test suite logged to: /cabal.dist/work/./dist/build//ghc-/T5309-1.0.0.0/t/test-no-lib/test/T5309-1.0.0.0-test-no-lib.log +1 of 1 test suites (1 of 1 test cases) passed. +Configuring test suite 'test-with-lib' for T5309-1.0.0.0.. +Warning: The package has an extraneous version range for a dependency on an internal library: T5309 -any && ==1.0.0.0, T5309 -any && ==1.0.0.0, T5309 -any && ==1.0.0.0. This version range includes the current package but isn't needed as the current package's library will always be used. +Preprocessing test suite 'test-with-lib' for T5309-1.0.0.0.. +Building test suite 'test-with-lib' for T5309-1.0.0.0.. +Running 1 test suites... +Test suite test-with-lib: RUNNING... +Test suite test-with-lib: PASS +Test suite logged to: /cabal.dist/work/./dist/build//ghc-/T5309-1.0.0.0/t/test-with-lib/test/T5309-1.0.0.0-test-with-lib.log +1 of 1 test suites (1 of 1 test cases) passed. +# cabal new-bench +Build profile: -w ghc- -O1 +In order, the following will be built: + - T5309-1.0.0.0 (bench:bench-no-lib) (first run) + - T5309-1.0.0.0 (bench:bench-with-lib) (first run) +Configuring benchmark 'bench-no-lib' for T5309-1.0.0.0.. +Preprocessing benchmark 'bench-no-lib' for T5309-1.0.0.0.. +Building benchmark 'bench-no-lib' for T5309-1.0.0.0.. +Running 1 benchmarks... +Benchmark bench-no-lib: RUNNING... +Benchmark bench-no-lib: FINISH +Configuring benchmark 'bench-with-lib' for T5309-1.0.0.0.. +Warning: The package has an extraneous version range for a dependency on an internal library: T5309 -any && ==1.0.0.0, T5309 -any && ==1.0.0.0, T5309 -any && ==1.0.0.0. This version range includes the current package but isn't needed as the current package's library will always be used. +Preprocessing benchmark 'bench-with-lib' for T5309-1.0.0.0.. +Building benchmark 'bench-with-lib' for T5309-1.0.0.0.. +Running 1 benchmarks... +Benchmark bench-with-lib: RUNNING... +Benchmark bench-with-lib: FINISH diff --git a/cabal-testsuite/PackageTests/Regression/T5309/cabal.project b/cabal-testsuite/PackageTests/Regression/T5309/cabal.project new file mode 100644 index 000000000..8834d0440 --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/cabal.project @@ -0,0 +1,2 @@ +packages: + ./ diff --git a/cabal-testsuite/PackageTests/Regression/T5309/cabal.test.hs b/cabal-testsuite/PackageTests/Regression/T5309/cabal.test.hs new file mode 100644 index 000000000..24b7ebfed --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/cabal.test.hs @@ -0,0 +1,5 @@ +import Test.Cabal.Prelude +main = cabalTest $ do + cabal "new-build" ["all"] + cabal "new-test" ["all"] + cabal "new-bench" ["all"] diff --git a/cabal-testsuite/PackageTests/Regression/T5309/lib/Bio/Character/Exportable/Class.hs b/cabal-testsuite/PackageTests/Regression/T5309/lib/Bio/Character/Exportable/Class.hs new file mode 100644 index 000000000..91186b433 --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/lib/Bio/Character/Exportable/Class.hs @@ -0,0 +1,56 @@ +----------------------------------------------------------------------------- +-- | +-- Module : Bio.Character.Exportable.Class +-- Copyright : (c) 2015-2015 Ward Wheeler +-- License : BSD-style +-- +-- Maintainer : wheeler@amnh.org +-- Stability : provisional +-- Portability : portable +-- +-- Class for needed operations of coded sequences and characters +-- +-- +----------------------------------------------------------------------------- + +{-# LANGUAGE FlexibleContexts, FlexibleInstances, FunctionalDependencies, MultiParamTypeClasses #-} + +module Bio.Character.Exportable.Class where + + +import Foreign.C.Types + + +-- | +-- Represents a sequence of fixed width characters packed into a bitwise form +-- consumable by lower level functions. +class Exportable c where + + toExportableBuffer :: c -> ExportableCharacterSequence + fromExportableBuffer :: ExportableCharacterSequence -> c + + toExportableElements :: c -> Maybe ExportableCharacterElements + fromExportableElements :: ExportableCharacterElements -> c + + +-- | +-- A structure used for FFI calls. +-- +-- 'bufferChunks' contains the bit-packed representation of the character sequence. +data ExportableCharacterSequence + = ExportableCharacterSequence + { exportedElementCountSequence :: Int + , exportedElementWidthSequence :: Int + , exportedBufferChunks :: [CULong] + } deriving (Eq, Show) + + +-- | +-- A structure used for FFI calls-- +-- 'characterElements' contains the integral value for each character element. +data ExportableCharacterElements + = ExportableCharacterElements + { exportedElementCountElements :: Int + , exportedElementWidthElements :: Int + , exportedCharacterElements :: [CUInt] + } deriving (Eq, Show) diff --git a/cabal-testsuite/PackageTests/Regression/T5309/lib/Data/TCM/Memoized.hs b/cabal-testsuite/PackageTests/Regression/T5309/lib/Data/TCM/Memoized.hs new file mode 100644 index 000000000..fbe69a52d --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/lib/Data/TCM/Memoized.hs @@ -0,0 +1,45 @@ +----------------------------------------------------------------------------- +-- | +-- Module : Data.TCM.Memoized +-- Copyright : (c) 2015-2015 Ward Wheeler +-- License : BSD-style +-- +-- Maintainer : wheeler@amnh.org +-- Stability : provisional +-- Portability : portable +-- +----------------------------------------------------------------------------- + +module Data.TCM.Memoized + ( FFI.MemoizedCostMatrix + , generateMemoizedTransitionCostMatrix + , FFI.getMedianAndCost + ) where + +import qualified Data.TCM.Memoized.FFI as FFI + + +-- | +-- /O(n^2)/ where @n@ is the alphabet size. +-- +-- Generate a memoized TCM by supplying the size of the symbol alphabet and the +-- generating function for unambiguous symbol change cost to produce a memoized +-- TCM. A memoized TCM computes all the costs and medians of unambiguous, +-- singleton symbol set transitions strictly when this function is invoked. A +-- memoized TCM calculates the cost and medians of ambiguous symbol sets in a +-- lazy, memoized manner. +-- +-- *Note:* The collection of ambiguous symbols set transitions is the powerset of +-- the collection of unambiguous, singleton symbol sets. The lazy, memoization is +-- a requisite for efficient computation on any non-trivial alphabet size. +generateMemoizedTransitionCostMatrix + :: Word -- ^ Alphabet size + -> (Word -> Word -> Word) -- ^ Generating function + -> FFI.MemoizedCostMatrix +generateMemoizedTransitionCostMatrix = FFI.getMemoizedCostMatrix + +{- +-- Causes ambiguity with Data.TCM.(!) +(!) :: Exportable s => FFI.MemoizedCostMatrix -> (s, s) -> (s, Word) +(!) memo (x,y) = FFI.getMedianAndCost memo x y +-} diff --git a/cabal-testsuite/PackageTests/Regression/T5309/lib/Data/TCM/Memoized/FFI.hsc b/cabal-testsuite/PackageTests/Regression/T5309/lib/Data/TCM/Memoized/FFI.hsc new file mode 100644 index 000000000..2a96ba785 --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/lib/Data/TCM/Memoized/FFI.hsc @@ -0,0 +1,280 @@ +----------------------------------------------------------------------------- +-- | +-- TODO: Document module. +-- +-- Exports C types for dynamic characters and their constructors allong with +-- an FFI binding for the memoizing TCM structure. +----------------------------------------------------------------------------- + +{-# LANGUAGE BangPatterns, DeriveGeneric, FlexibleInstances, ForeignFunctionInterface, TypeSynonymInstances #-} + +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Data.TCM.Memoized.FFI + ( CBufferUnit + , CDynamicChar(..) + , DCElement(..) + , ForeignVoid() + , MemoizedCostMatrix(costMatrix) + , getMemoizedCostMatrix + , getMedianAndCost + -- * Utility functions + , calculateBufferLength + , coerceEnum + , constructCharacterFromExportable + , constructElementFromExportable + , constructEmptyElement + ) where + +import Bio.Character.Exportable.Class +import Data.Bits +import Foreign hiding (alignPtr) +import Foreign.C.Types +import GHC.Generics (Generic) +import System.IO.Unsafe + +-- import Debug.Trace + +#include "costMatrixWrapper.h" +#include "dynamicCharacterOperations.h" + + +-- | +-- A convient type alias for improved clairity of use. +type CBufferUnit = CULong -- This will be compatible with uint64_t + + +-- | +-- Type of a dynamic character to pass back and forth across the FFI interface. +data CDynamicChar + = CDynamicChar + { alphabetSizeChar :: CSize + , numElements :: CSize + , dynCharLen :: CSize + , dynChar :: Ptr CBufferUnit + } + + +-- | +-- Represents a single element in a dynamic character in an exportable form. +data DCElement = DCElement + { alphabetSizeElem :: CSize + , characterElement :: Ptr CBufferUnit + } deriving (Show) + + +-- | +-- A closed type wrapping a void pointer in C to the C++ memoized TCM. +data ForeignVoid deriving (Generic) + + +-- | +-- A type-safe wrapper for the mutable, memoized TCm. +newtype MemoizedCostMatrix + = MemoizedCostMatrix + { costMatrix :: StablePtr ForeignVoid + } deriving (Eq, Generic) + + +{- +-- | (✔) +instance Show CDynamicChar where + show (CDynamicChar alphSize dcLen numElems dChar) = + mconcat + ["alphabetSize: " + , show intAlphSize + , "\ndynCharLen: " + , show intLen + , "\nbuffer length: " + , show bufferLength + , "\ndynChar: " + , show $ unsafePerformIO printedArr + ] + where + bufferLength = fromEnum numElems + intAlphSize = fromEnum alphSize + intLen = fromEnum dcLen + printedArr = show <$> peekArray bufferLength dChar + +-} + + +instance Storable CDynamicChar where + + sizeOf _ = (#size struct dynChar_t) -- #size is a built-in that works with arrays, as are #peek and #poke, below + + alignment _ = alignment (undefined :: CBufferUnit) + + peek ptr = do -- to get values from the C app + alphLen <- (#peek struct dynChar_t, alphSize ) ptr + nElems <- (#peek struct dynChar_t, numElems ) ptr + seqLen <- (#peek struct dynChar_t, dynCharLen) ptr + seqVal <- (#peek struct dynChar_t, dynChar ) ptr + pure CDynamicChar + { alphabetSizeChar = alphLen + , numElements = nElems + , dynCharLen = seqLen + , dynChar = seqVal + } + + poke ptr (CDynamicChar alphLen nElems seqLen seqVal) = do -- to modify values in the C app + (#poke struct dynChar_t, alphSize ) ptr alphLen + (#poke struct dynChar_t, numElems ) ptr nElems + (#poke struct dynChar_t, dynCharLen) ptr seqLen + (#poke struct dynChar_t, dynChar ) ptr seqVal + + +-- | (✔) +instance Storable DCElement where + + sizeOf _ = (#size struct dcElement_t) + + alignment _ = alignment (undefined :: CBufferUnit) + + peek ptr = do + alphLen <- (#peek struct dcElement_t, alphSize) ptr + element <- (#peek struct dcElement_t, element ) ptr + pure DCElement + { alphabetSizeElem = alphLen + , characterElement = element + } + + poke ptr (DCElement alphLen element) = do + (#poke struct dcElement_t, alphSize) ptr alphLen + (#poke struct dcElement_t, element ) ptr element + + + +-- TODO: For now we only allocate 2d matrices. 3d will come later. +-- | +-- Create and allocate cost matrix. +-- The first argument, TCM, is only for non-ambiguous nucleotides, and it used to +-- generate the entire cost matrix, which includes ambiguous elements. TCM is +-- row-major, with each row being the left character element. It is therefore +-- indexed not by powers of two, but by cardinal integer. +foreign import ccall unsafe "costMatrixWrapper matrixInit" + initializeMemoizedCMfn_c :: CSize + -> Ptr CInt + -> IO (StablePtr ForeignVoid) + + +foreign import ccall unsafe "costMatrix getCostAndMedian" + getCostAndMedianFn_c :: Ptr DCElement + -> Ptr DCElement + -> Ptr DCElement +-- -> CSize + -> StablePtr ForeignVoid + -> IO CInt + + +-- | +-- Set up and return a cost matrix. +-- +-- The cost matrix is allocated strictly. +getMemoizedCostMatrix :: Word + -> (Word -> Word -> Word) + -> MemoizedCostMatrix +getMemoizedCostMatrix alphabetSize costFn = unsafePerformIO . withArray rowMajorList $ \allocedTCM -> do + !resultPtr <- initializeMemoizedCMfn_c (coerceEnum alphabetSize) allocedTCM + pure $ MemoizedCostMatrix resultPtr + where + rowMajorList = [ coerceEnum $ costFn i j | i <- range, j <- range ] + range = [0 .. alphabetSize - 1] + + +-- | +-- /O(1)/ amortized. +-- +-- Calculate the median symbol set and transition cost between the two input +-- symbol sets. +-- +-- *Note:* This operation is lazily evaluated and memoized for future calls. +getMedianAndCost :: Exportable s => MemoizedCostMatrix -> s -> s -> (s, Word) +getMedianAndCost memo lhs rhs = unsafePerformIO $ do + medianPtr <- constructEmptyElement alphabetSize + lhs' <- constructElementFromExportable lhs + rhs' <- constructElementFromExportable rhs + !cost <- getCostAndMedianFn_c lhs' rhs' medianPtr (costMatrix memo) + medianElement <- peek medianPtr + medianValue <- fmap buildExportable . peekArray bufferLength $ characterElement medianElement + pure (medianValue, coerceEnum cost) + where + alphabetSize = exportedElementWidthSequence $ toExportableBuffer lhs + buildExportable = fromExportableBuffer . ExportableCharacterSequence 1 alphabetSize + bufferLength = calculateBufferLength alphabetSize 1 + + +-- | +-- /O(1)/ +-- +-- Calculate the buffer length based on the element count and element bit width. +calculateBufferLength :: Enum b + => Int -- ^ Element count + -> Int -- ^ Element bit width + -> b +calculateBufferLength count width = coerceEnum $ q + if r == 0 then 0 else 1 + where + (q,r) = (count * width) `divMod` finiteBitSize (undefined :: CULong) + + +-- | +-- Coerce one 'Enum' value to another through the type's corresponding 'Int' +-- values. +coerceEnum :: (Enum a, Enum b) => a -> b +coerceEnum = toEnum . fromEnum + + +-- | +-- /O(n)/ where @n@ is the length of the dynamic character. +-- +-- Malloc and populate a pointer to an exportable representation of the +-- 'Exportable' value. The supplied value is assumed to be a dynamic character +-- and the result is a pointer to a C representation of a dynamic character. +constructCharacterFromExportable :: Exportable s => s -> IO (Ptr CDynamicChar) +constructCharacterFromExportable exChar = do + valueBuffer <- newArray $ exportedBufferChunks exportableBuffer + charPointer <- malloc :: IO (Ptr CDynamicChar) + let charValue = CDynamicChar (coerceEnum width) (coerceEnum count) bufLen valueBuffer + !_ <- poke charPointer charValue + pure charPointer + where + count = exportedElementCountSequence exportableBuffer + width = exportedElementWidthSequence exportableBuffer + bufLen = calculateBufferLength count width + exportableBuffer = toExportableBuffer exChar + + +-- | +-- /O(1)/ +-- +-- Malloc and populate a pointer to an exportable representation of the +-- 'Exportable' value. The supplied value is assumed to be a dynamic character +-- element and the result is a pointer to a C representation of a dynamic +-- character element. +constructElementFromExportable :: Exportable s => s -> IO (Ptr DCElement) +constructElementFromExportable exChar = do + valueBuffer <- newArray $ exportedBufferChunks exportableBuffer + elementPointer <- malloc :: IO (Ptr DCElement) + let elementValue = DCElement (coerceEnum width) valueBuffer + !_ <- poke elementPointer elementValue + pure elementPointer + where + width = exportedElementWidthSequence exportableBuffer + exportableBuffer = toExportableBuffer exChar + + +-- | +-- /O(1)/ +-- +-- Malloc and populate a pointer to a C representation of a dynamic character. +-- The buffer of the resulting value is intentially zeroed out. +constructEmptyElement :: Int -- ^ Bit width of a dynamic character element. + -> IO (Ptr DCElement) +constructEmptyElement alphabetSize = do + elementPointer <- malloc :: IO (Ptr DCElement) + valueBuffer <- mallocArray bufferLength + let elementValue = DCElement (coerceEnum alphabetSize) valueBuffer + !_ <- poke elementPointer elementValue + pure elementPointer + where + bufferLength = calculateBufferLength alphabetSize 1 diff --git a/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrix.cpp b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrix.cpp new file mode 100644 index 000000000..c85fa58c8 --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrix.cpp @@ -0,0 +1,93 @@ +#include + +/* +#include +#include +#include +*/ + +#include "costMatrix.h" +#include "dynamicCharacterOperations.h" +#include //for memcpy; + +#define __STDC_FORMAT_MACROS + +// TODO: I'll need this for the Haskell side of things: https://hackage.haskell.org/package/base-4.9.0.0/docs/Foreign-StablePtr.html + +costMatrix_p construct_CostMatrix_C(size_t alphSize, int* tcm) { + return new CostMatrix(alphSize, tcm); +} + +void destruct_CostMatrix_C(costMatrix_p untyped_self) { + delete static_cast (untyped_self); +} + +int call_getSetCost_C(costMatrix_p untyped_self, dcElement_t* left, dcElement_t* right, dcElement_t* retMedian) { + + CostMatrix* thisMtx = static_cast (untyped_self); + return thisMtx->getSetCostMedian(left, right, retMedian); +} + + +void freeCostMedian_t (costMedian_t* toFree) { + free(toFree->second); +} + +CostMatrix::CostMatrix(size_t alphSize, int* inTcm) { + alphabetSize = alphSize; + size_t space = alphabetSize * alphabetSize * sizeof(int); + tcm = (int*) malloc(space); + memcpy(tcm, inTcm, space); + initializeMatrix(); +} + +CostMatrix::~CostMatrix() { + for ( auto& thing: myMatrix ) { + freeCostMedian_t(&thing.second); + } + myMatrix.clear(); + hasher.clear(); + +} + +int CostMatrix::getCostMedian(dcElement_t* left, dcElement_t* right, dcElement_t* retMedian) { + keys_t toLookup; + toLookup.first = *left; + toLookup.second = *right; + mapIterator found; + int foundCost; + + found = myMatrix.find(toLookup); + + if ( found == myMatrix.end() ) { + return -1; + } else { + foundCost = found->second.first; + retMedian->element = found->second.second; + } + + return foundCost; +} + +int CostMatrix::getSetCostMedian(dcElement_t* left, dcElement_t* right, dcElement_t* retMedian) { + keys_t* toLookup = (keys_t*) malloc( sizeof(keys_t) ); + toLookup->first = *left; + toLookup->second = *right; + mapIterator found; + int foundCost; + + found = myMatrix.find(*toLookup); + + if ( found == myMatrix.end() ) { + foundCost = 0; + } else { + foundCost = found->second.first; + } + return foundCost; +} + +void CostMatrix::initializeMatrix () { ; } + +void CostMatrix::setValue(keys_t* key, costMedian_t* median) { + myMatrix.insert(std::make_pair(*key, *median)); +} diff --git a/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrix.h b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrix.h new file mode 100644 index 000000000..a7f1bc25e --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrix.h @@ -0,0 +1,201 @@ +/** costMatrix object to provide for a memoizable cost lookup table. Table is indexed by two + * dcElement values, and returns an int, for the cost. In addition, an additional dcElement + * is passed in by reference, and the median value of the two input elements is placed there. + * The getCost function is designed to interface directly with C. + * + * The key lookup is an ordered pair, so when looking up transition a -> b, a must go in as + * first in pair + * + * WARNING: In the interest of speed this code does no "type checking" to make sure that the + * two passed deElements are of the same type, i.e. that they have the same alphabet length. + * Any such checks should be done exterior to this library. + */ + +#ifndef _COSTMATRIX_H +#define _COSTMATRIX_H + +#define DEBUG 0 + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "dynamicCharacterOperations.h" + +/** Next three fns defined here to use on C side. */ +costMatrix_p construct_CostMatrix_C (size_t alphSize, int* tcm); + +void destruct_CostMatrix_C (costMatrix_p mytype); + +int call_getSetCost_C (costMatrix_p untyped_self, dcElement_t* left, dcElement_t* right, dcElement_t* retMedian); + +#ifdef __cplusplus +} +#endif + +typedef std::pair keys_t; +typedef std::pair costMedian_t; +typedef std::pair mapAccessPair_t; + +typedef void* costMatrix_p; + +/** Allocate room for a costMedian_t. Assumes alphabetSize is already initialized. */ +costMedian_t* allocCostMedian_t (size_t alphabetSize); + +/** dealloc costMedian_t. */ +void freeCostMedian_t (costMedian_t* toFree); + +/** Allocate room for a keys_t. */ +keys_t* allocKeys_t (size_t alphSize); + +/** dealloc keys_t. Calls various other free fns. */ +void freeKeys_t (const keys_t* toFree); + +/** Allocate space for Pair, calling allocators for both types. */ +mapAccessPair_t* allocateMapAccessPair (size_t alphSize); + +/** Hashes two `dcElement`s, and returns an order-dependent hash value. In this case + * "order dependent" means that the order of the arrays within the `dcElement`s matter, + * and the order that the `dcElement`s are sent in also matters, as is necessary for a + * non-symmetric tcm. + * + * First loops through each `dcElement` and combines all of the element values (recall that a + * `dcElement` has two fields, the second of which is the element, and is an array of `uint64_t`s) + * using two different seeds, then combines the two resulting values. + */ +struct KeyHash { + /** Following hash_combine code modified from here (seems to be based on Boost): + * http://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x + */ + std::size_t hash_combine (const dcElement_t lhs, const dcElement_t rhs) const { + std::size_t left_seed = 3141592653; // PI used as arbitrarily random seed + std::size_t right_seed = 2718281828; // E used as arbitrarily random seed + + std::hash hasher; + size_t elemArrCount = dcElemSize(lhs.alphSize); + for (size_t i = 0; i < elemArrCount; i++) { + left_seed ^= hasher(lhs.element[i]) + 0x9e3779b9 + (left_seed << 6) + (left_seed >> 2); + right_seed ^= hasher(rhs.element[i]) + 0x9e3779b9 + (right_seed << 6) + (right_seed >> 2); + } + left_seed ^= hasher(right_seed) + 0x9e3779b9 + (left_seed << 6) + (left_seed >> 2); + return left_seed; + } + + std::size_t operator()(const keys_t& k) const + { + return hash_combine (k.first, k.second); + } +}; + +struct KeyEqual { + // Return true if every `uint64_t` in lhs->element and rhs->element is equal, else false. + bool operator()(const keys_t& lhs, const keys_t& rhs) const + { + // Assert that all key components share the same alphSize value + if ( lhs.first.alphSize != rhs.first.alphSize + || lhs.first.alphSize != lhs.second.alphSize + || lhs.second.alphSize != rhs.second.alphSize) { + return false; + } + + //Assert that the left key elements match the right key elements + size_t elemArrWidth = dcElemSize(lhs.first.alphSize); + for (size_t i = 0; i < elemArrWidth; i++) { + if (lhs.first.element[i] != rhs.first.element[i]) { + return false; + } + if (lhs.second.element[i] != rhs.second.element[i]) { + return false; + } + } + return true; + } +}; + +typedef std::unordered_map::const_iterator mapIterator; + + +class CostMatrix +{ + public: +// CostMatrix(); + + CostMatrix(size_t alphSize, int* tcm); + + ~CostMatrix(); + + /** Getter only for cost. Necessary for testing, to insure that particular + * key pair has, in fact, already been inserted into lookup table. + */ + int getCostMedian(dcElement_t* left, dcElement_t* right, dcElement_t* retMedian); + + /** Acts as both a setter and getter, mutating myMap. + * + * Receives two dcElements and computes the transformation cost as well as + * the median for the two. Puts the median and alphabet size into retMedian, + * which must therefore by necessity be allocated elsewhere. + * + * This functin allocates _if necessary_. So freeing inputs after a call will not + * cause invalid reads from the cost matrix. + */ + int getSetCostMedian(dcElement_t* left, dcElement_t* right, dcElement_t* retMedian); + + private: + std::unordered_map myMatrix; + + std::unordered_map hasher; + + size_t alphabetSize; + + /** Stored unambiguous tcm, necessary to do first calls to findDistance() without having to rewrite findDistance() + * and computeCostMedian() + */ + int *tcm; + + /** Takes in a `keys_t` and a `costMedian_t` and updates myMap to store the new values, + * with @key as a key, and @median as the value. + */ + void setValue(keys_t* key, costMedian_t* median); + + /** Takes in a pair of keys_t (each of which is a single `dcElement`) and computes their lowest-cost median. + * Uses a Sankoff-like algorithm, where all bases are considered, and the lowest cost bases are included in the + * cost and median calculations. That means a base might appear in the median that is not present in either of + * the two elements being compared. + */ + costMedian_t* computeCostMedian(keys_t key); + + /** Find distance between an ambiguous nucleotide and an unambiguous ambElem. Return that value and the median. + * @param ambElem is ambiguous input. + * @param nucleotide is unambiguous. + * @param median is used to return the calculated median value. + * + * This fn is necessary because there isn't yet a cost matrix set up, so it's not possible to + * look up ambElems, therefore we must loop over possible values of the ambElem + * and find the lowest cost median. + * + * Nota bene: Requires symmetric, if not metric, matrix. TODO: Is this true? If so fix it? + */ + int findDistance (keys_t* searchKey, dcElement_t* ambElem); + + /** Takes in an initial TCM, which is actually just a row-major array, creates hash table of costs + * where cost is least cost between two elements, and medians, where median is union of characters. + * + * Nota bene: + * Can only be called once this.alphabetSize has been set. + */ + void initializeMatrix (); + + // DEPRECATED!!! + /** Takes in a pair of keys_t (each of which is a single `dcElement`) and computes their lowest-cost median. + * Contrast with computeCostMedian(). In this algorithm only bases which are present in at least one of + * the two elements being compared are considered. + */ + /* costMedian_t* computeCostMedianFitchy(keys_t keys); */ + +}; + +#endif // COSTMATRIX_H diff --git a/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrixWrapper.c b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrixWrapper.c new file mode 100644 index 000000000..72fe9bd73 --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrixWrapper.c @@ -0,0 +1,28 @@ +#include +#include + +#include "costMatrixWrapper.h" +#include "dynamicCharacterOperations.h" + +costMatrix_p matrixInit(size_t alphSize, int *tcm) { + return (costMatrix_p) construct_CostMatrix_C(alphSize, tcm); +} + +void matrixDestroy(costMatrix_p untyped_ptr) { + destruct_CostMatrix_C(untyped_ptr); +} + +int getCostAndMedian(dcElement_t *elem1, dcElement_t *elem2, dcElement_t *retElem, costMatrix_p tcm) { + size_t alphSize = elem1->alphSize; + dcElement_t *elem1copy = (dcElement_t *) malloc(sizeof(dcElement_t)); + elem1copy->alphSize = alphSize; + dcElement_t *elem2copy = (dcElement_t *) malloc(sizeof(dcElement_t)); + elem2copy->alphSize = alphSize; + + elem1copy->element = makePackedCharCopy( elem1->element, alphSize, 1 ); + elem2copy->element = makePackedCharCopy( elem2->element, alphSize, 1 ); + + int cost = call_getSetCost_C(tcm, elem1copy, elem2copy, retElem); + + return cost; +} diff --git a/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrixWrapper.h b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrixWrapper.h new file mode 100644 index 000000000..f592347dd --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/costMatrixWrapper.h @@ -0,0 +1,24 @@ +#ifndef _COST_MATRIX_WRAPPER_H +#define _COST_MATRIX_WRAPPER_H + +#include + +#include "dynamicCharacterOperations.h" + +/** Initialize a matrix (fill in all values for non-ambiguous chracter transition costs) using a TCM sent in from an outside source. */ +costMatrix_p matrixInit(size_t alphSize, int *tcm); + +/** C wrapper for cpp destructor */ +void matrixDestroy(costMatrix_p untyped_ptr); + +/** Like getCost, but also returns a pointer to a median value. */ +int getCostAndMedian(dcElement_t *elem1, dcElement_t *elem2, dcElement_t *retElem, costMatrix_p tcm); + +/** Following three fns are C references to cpp functions found in costMatrix.cpp */ +costMatrix_p construct_CostMatrix_C(size_t alphSize, int *tcm); + +void destruct_CostMatrix_C(costMatrix_p mytype); + +int call_getSetCost_C(costMatrix_p untyped_self, dcElement_t *left, dcElement_t *right, dcElement_t *retMedian); + +#endif // _COST_MATRIX_WRAPPER_H diff --git a/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/dynamicCharacterOperations.c b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/dynamicCharacterOperations.c new file mode 100644 index 000000000..26b1b418c --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/dynamicCharacterOperations.c @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +#include "dynamicCharacterOperations.h" + +#define __STDC_FORMAT_MACROS + +size_t dynCharSize(size_t alphSize, size_t numElems) { return 1; } + +size_t dcElemSize(size_t alphSize) { return 1; } + +packedChar *allocatePackedChar( size_t alphSize, size_t numElems ) { + packedChar *outChar = (packedChar*) calloc( dynCharSize(alphSize, numElems), sizeof(packedChar) ); + if (outChar == NULL) { + printf("Out of memory.\n"); + fflush(stdout); + exit(1); + } + return outChar; +} + +packedChar *makePackedCharCopy( packedChar *inChar, size_t alphSize, size_t numElems) { + packedChar *outChar = allocatePackedChar(alphSize, numElems); + size_t length = dynCharSize(alphSize, numElems); + for (size_t i = 0; i < length; i++) { + outChar[i] = inChar[i]; + } + return outChar; +} + +void freeDynChar( dynChar_t *p ) { free( p->dynChar ); } + +void freeDCElem( const dcElement_t *p ) { free( p->element ); } diff --git a/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/dynamicCharacterOperations.h b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/dynamicCharacterOperations.h new file mode 100644 index 000000000..4648772c7 --- /dev/null +++ b/cabal-testsuite/PackageTests/Regression/T5309/memoized-tcm/dynamicCharacterOperations.h @@ -0,0 +1,36 @@ +#ifndef DYNAMIC_CHARACTER_OPERATIONS +#define DYNAMIC_CHARACTER_OPERATIONS + +#include +#include +#include +#include + +typedef uint64_t packedChar; +typedef void *costMatrix_p; + +typedef struct dynChar_t { + size_t alphSize; + size_t numElems; // how many dc elements are stored + size_t dynCharLen; // how many uint64_ts are necessary to store the elements + packedChar *dynChar; +} dynChar_t; + +typedef struct dcElement_t { + size_t alphSize; + packedChar *element; +} dcElement_t; + +size_t dynCharSize(size_t alphSize, size_t numElems); + +size_t dcElemSize(size_t alphSize); + +void freeDynChar( dynChar_t *p ); + +void freeDCElem( const dcElement_t *p ); + +packedChar *allocatePackedChar( size_t alphSize, size_t numElems ); + +packedChar *makePackedCharCopy( packedChar *inChar, size_t alphSize, size_t numElems ); + +#endif /* DYNAMIC_CHARACTER_OPERATIONS */ -- 2.11.4.GIT