diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dacb8044..f46f1362 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,6 +59,10 @@ jobs: - "9.0" - "9.2" - "9.4" + - "9.6" + - "9.8" + - "9.10" + - "9.12" steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index 24e70d70..9c126edd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +### 0.17.0 + * Add XXH3-64 source hash to mod files for reliable change detection + * **breaks mod file compatibility** with previous versions + * New C library dependency: xxHash (Haskell: `xxhash-ffi`) + ### 0.16.9 * Added support for legacy features in Fortran 90 free-form style (selected via `--fortranVersion=Fortran90Legacy`) diff --git a/README.md b/README.md index 13705e31..edabff0f 100644 --- a/README.md +++ b/README.md @@ -95,8 +95,10 @@ the file name: * Unknown extensions are parsed like `*.f` files. ## Building -You will need the GMP library plus header files: on many platforms, this will be -via the package `libgmp-dev`. +You will need the following C libraries plus header files: + + * GMP: on many platforms, via the package `libgmp-dev` + * xxHash: on many platforms, via the package `libxxhash-dev` Haskell library dependencies are listed in `package.yaml`. fortran-src supports building with Stack or Cabal. diff --git a/app/Main.hs b/app/Main.hs index 4bc50b93..d92860a2 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -6,6 +6,7 @@ module Main ( main ) where import Prelude hiding (readFile, mod) import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as LB +import Data.Word (Word8) import Text.PrettyPrint (render) @@ -82,8 +83,8 @@ main = do , not (null nxt) = do let fnPaths = [ fn | (_, Just (MOFile fn)) <- nxt ] newMods <- fmap concat . forM fnPaths $ \ fnPath -> do - tsStatus <- checkTimestamps fnPath - case tsStatus of + hashStatus <- checkModFileHash fnPath + case hashStatus of NoSuchFile -> do putStr $ "Does not exist: " ++ fnPath pure [emptyModFile] @@ -159,13 +160,15 @@ main = do case decodeModFile contents' of Left msg -> putStrLn $ "Error: " ++ msg Right mfs -> forM_ mfs $ \ mf -> - putStrLn $ "Filename: " ++ moduleFilename mf ++ - "\n\nStringMap:\n" ++ showStringMap (combinedStringMap [mf]) ++ - "\n\nModuleMap:\n" ++ showModuleMap (combinedModuleMap [mf]) ++ - "\n\nDeclMap:\n" ++ showGenericMap (combinedDeclMap [mf]) ++ - "\n\nTypeEnv:\n" ++ showTypes (combinedTypeEnv [mf]) ++ - "\n\nParamVarMap:\n" ++ showGenericMap (combinedParamVarMap [mf]) ++ - "\n\nOther Data Labels: " ++ show (getLabelsModFileData mf) + let hashHex = concatMap (printf "%02x") (B.unpack (moduleSourceHash mf)) + in putStrLn $ "Filename: " ++ moduleFilename mf ++ + "\nSource Hash (XXH3-64): " ++ hashHex ++ + "\n\nStringMap:\n" ++ showStringMap (combinedStringMap [mf]) ++ + "\n\nModuleMap:\n" ++ showModuleMap (combinedModuleMap [mf]) ++ + "\n\nDeclMap:\n" ++ showGenericMap (combinedDeclMap [mf]) ++ + "\n\nTypeEnv:\n" ++ showTypes (combinedTypeEnv [mf]) ++ + "\n\nParamVarMap:\n" ++ showGenericMap (combinedParamVarMap [mf]) ++ + "\n\nOther Data Labels: " ++ show (getLabelsModFileData mf) ShowFlows isFrom isSuper astBlockId -> do let pf = analyseParameterVars pvm . analyseBBlocks . @@ -221,7 +224,8 @@ compileFileToMod mvers mods path moutfile = do let version = fromMaybe (deduceFortranVersion path) mvers mmap = combinedModuleMap mods tenv = stripExtended $ combinedTypeEnv mods - runCompile = genModFile . fst . analyseTypesWithEnv tenv . analyseRenamesWithModuleMap mmap . initAnalysis + sourceHash = computeSourceHash contents + runCompile = genModFile sourceHash . fst . analyseTypesWithEnv tenv . analyseRenamesWithModuleMap mmap . initAnalysis parsedPF <- case (Parser.byVerWithMods mods version) path contents of Right pf -> return pf diff --git a/fortran-src.cabal b/fortran-src.cabal index 41d429c6..c2d8184b 100644 --- a/fortran-src.cabal +++ b/fortran-src.cabal @@ -1,11 +1,11 @@ cabal-version: 1.12 --- This file has been generated from package.yaml by hpack version 0.39.1. +-- This file has been generated from package.yaml by hpack version 0.38.1. -- -- see: https://github.com/sol/hpack name: fortran-src -version: 0.16.9 +version: 0.17.0 synopsis: Parsers and analyses for Fortran standards 66, 77, 90, 95 and 2003 (partial). description: Provides lexing, parsing, and basic analyses of Fortran code covering standards: FORTRAN 66, FORTRAN 77, Fortran 90, Fortran 95, Fortran 2003 (partial) and some legacy extensions. Includes data flow and basic block analysis, a renamer, and type analysis. For example usage, see the @@ project, which uses fortran-src as its front end. category: Language @@ -205,6 +205,7 @@ library , temporary >=1.2 && <1.4 , text >=1.2 && <2.2 , uniplate >=1.6 && <2 + , xxhash-ffi ==0.3.* default-language: Haskell2010 if os(windows) cpp-options: -DFS_DISABLE_WIN_BROKEN_TESTS @@ -269,6 +270,7 @@ executable fortran-src , temporary >=1.2 && <1.4 , text >=1.2 && <2.2 , uniplate >=1.6 && <2 + , xxhash-ffi ==0.3.* default-language: Haskell2010 if os(windows) cpp-options: -DFS_DISABLE_WIN_BROKEN_TESTS @@ -368,6 +370,7 @@ test-suite spec , temporary >=1.2 && <1.4 , text >=1.2 && <2.2 , uniplate >=1.6 && <2 + , xxhash-ffi ==0.3.* default-language: Haskell2010 if os(windows) cpp-options: -DFS_DISABLE_WIN_BROKEN_TESTS diff --git a/haskell-flake-ghc92.nix b/haskell-flake-ghc92.nix index 67443517..ed60b33c 100644 --- a/haskell-flake-ghc92.nix +++ b/haskell-flake-ghc92.nix @@ -12,8 +12,13 @@ pkgs: { singletons.source = "3.0.1"; # req because singletons-th-3.1 had bad bounds th-desugar.source = "1.13.1"; th-abstraction.source = "0.4.5.0"; + xxhash-ffi.source = "0.3"; }; + otherOverlays = [ + (self: super: { libxxhash = pkgs.xxHash; }) + ]; + # (note this is actually unused/we have to duplicate because it doesn't get # packed into basePackages or any key we can use... but nice to document here) devShell = { diff --git a/haskell-flake-ghc94.nix b/haskell-flake-ghc94.nix index 6ebf68e8..d561e1a7 100644 --- a/haskell-flake-ghc94.nix +++ b/haskell-flake-ghc94.nix @@ -12,8 +12,13 @@ pkgs: { #singletons.source = "3.0.1"; th-desugar.source = "1.14"; th-abstraction.source = "0.4.5.0"; + xxhash-ffi.source = "0.3"; }; + otherOverlays = [ + (self: super: { libxxhash = pkgs.xxHash; }) + ]; + # (note this is actually unused/we have to duplicate because it doesn't get # packed into basePackages or any key we can use... but nice to document here) devShell = { diff --git a/package.yaml b/package.yaml index 0f6a75fa..25a3f78d 100644 --- a/package.yaml +++ b/package.yaml @@ -1,5 +1,5 @@ name: fortran-src -version: '0.16.9' +version: '0.17.0' synopsis: Parsers and analyses for Fortran standards 66, 77, 90, 95 and 2003 (partial). description: >- Provides lexing, parsing, and basic analyses of Fortran code covering @@ -98,6 +98,7 @@ dependencies: - temporary >=1.2 && <1.4 - either ^>=5.0.1.1 - process >= 1.2.0.0 +- xxhash-ffi >= 0.3 && < 0.4 - singletons >= 3.0 && < 3.6 diff --git a/src/Language/Fortran/Util/ModFile.hs b/src/Language/Fortran/Util/ModFile.hs index 9079a180..9667e8cb 100644 --- a/src/Language/Fortran/Util/ModFile.hs +++ b/src/Language/Fortran/Util/ModFile.hs @@ -23,9 +23,16 @@ renamer. The other data is up to you. Note that the encoder and decoder work on lists of ModFile so that one fsmod-file may contain information about multiple Fortran files. +Each ModFile includes a source hash (XXH3-64, 8 bytes) to verify that the +precompiled information matches the current source file. Use +'checkSourceHash' to validate before using a ModFile. + One typical usage might look like: -> let modFile1 = genModFile programFile +> contents <- flexReadFile path +> let sourceHash = computeSourceHash contents +> -- ... parse contents into programFile ... +> let modFile1 = genModFile sourceHash programFile > let modFile2 = alterModFileData (const (Just ...)) "mydata" modFile1 > let bytes = encodeModFile [modFile2] > ... @@ -45,6 +52,9 @@ module Language.Fortran.Util.ModFile ModFile, ModFiles, emptyModFile, emptyModFiles, modFileSuffix , lookupModFileData, getLabelsModFileData, alterModFileData, alterModFileDataF + -- * Source hashing + , SourceHash, computeSourceHash, computeFileHash + -- * Creation , genModFile, regenModFile @@ -52,13 +62,13 @@ module Language.Fortran.Util.ModFile , encodeModFile, decodeModFile, decodeModFiles, decodeModFiles' -- * Operations - , moduleFilename + , moduleFilename, moduleSourceHash , StringMap, extractStringMap, combinedStringMap , DeclContext(..), DeclMap, extractDeclMap, combinedDeclMap , extractModuleMap, combinedModuleMap, localisedModuleMap, combinedTypeEnv , ParamVarMap, extractParamVarMap, combinedParamVarMap , genUniqNameToFilenameMap - , TimestampStatus(..), checkTimestamps + , HashStatus(..), checkModFileHash, checkSourceHash, checkTimestamps ) where import qualified Language.Fortran.AST as F @@ -70,22 +80,34 @@ import qualified Language.Fortran.Analysis.Types as FAT import qualified Language.Fortran.Util.Position as P import Language.Fortran.Util.Files ( getDirContents ) +import Control.Exception (evaluate) import Control.Monad.State import Control.Monad -- required for mtl-2.3 (GHC 9.6) import Data.Binary (Binary, encode, decodeOrFail) +import Data.Bits (shiftR, (.&.)) +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as LB +import qualified Data.ByteString.Unsafe as BU import Data.Data +import qualified Data.Digest.XXHash.FFI.C as XXH import Data.Generics.Uniplate.Operations import qualified Data.Map.Strict as M import Data.Maybe +import Data.Word (Word8, Word64) +import Foreign.C.Types (CSize(..), CULLong(..)) import GHC.Generics (Generic) import System.Directory ( doesFileExist, getModificationTime ) import qualified System.FilePath import System.FilePath ( (-<.>), (), normalise ) import System.IO ( hPutStrLn, stderr ) +import System.IO.Unsafe ( unsafePerformIO ) -------------------------------------------------- +-- | Type alias for source file hash (XXH3-64, 8 bytes) +type SourceHash = B.ByteString + -- | Standard ending of fortran-src-format "mod files" modFileSuffix :: String modFileSuffix = ".fsmod" @@ -117,6 +139,7 @@ type ParamVarMap = FAD.ParameterVarMap -- | The data stored in the "mod files" data ModFile = ModFile { mfFilename :: String + , mfSourceHash :: SourceHash -- ^ XXH3-64 hash of source file (8 bytes) , mfStringMap :: StringMap , mfModuleMap :: FAR.ModuleMap , mfDeclMap :: DeclMap @@ -137,22 +160,53 @@ emptyModFiles = [] -- | Starting point. emptyModFile :: ModFile -emptyModFile = ModFile "" M.empty M.empty M.empty M.empty M.empty M.empty +emptyModFile = ModFile "" B.empty M.empty M.empty M.empty M.empty M.empty M.empty + +-- | Convert Word64 to 8-byte ByteString (little-endian) +word64ToBytes :: Word64 -> B.ByteString +word64ToBytes w = B.pack + [ fromIntegral (w .&. 0xFF) + , fromIntegral ((w `shiftR` 8) .&. 0xFF) + , fromIntegral ((w `shiftR` 16) .&. 0xFF) + , fromIntegral ((w `shiftR` 24) .&. 0xFF) + , fromIntegral ((w `shiftR` 32) .&. 0xFF) + , fromIntegral ((w `shiftR` 40) .&. 0xFF) + , fromIntegral ((w `shiftR` 48) .&. 0xFF) + , fromIntegral ((w `shiftR` 56) .&. 0xFF) + ] + +-- | Compute XXH3-64 hash from file contents (strict ByteString). +-- Use this when you've already read the file. +computeSourceHash :: B.ByteString -> SourceHash +computeSourceHash contents = unsafePerformIO $ do + hash <- BU.unsafeUseAsCStringLen contents $ \(ptr, len) -> + XXH.c_xxh3_64bits_withSeed ptr (fromIntegral len) 0 + return $ word64ToBytes (fromIntegral hash) + +-- | Compute XXH3-64 hash of a file's contents. +-- Convenience function when you have a filepath and haven't read the file yet. +computeFileHash :: FilePath -> IO SourceHash +computeFileHash path = do + contents <- B.readFile path + return $ computeSourceHash contents -- | Extracts the module map, declaration map and type analysis from -- an analysed and renamed ProgramFile, then inserts it into the -- ModFile. -regenModFile :: forall a. (Data a) => F.ProgramFile (FA.Analysis a) -> ModFile -> ModFile -regenModFile pf mf = mf { mfModuleMap = extractModuleMap pf - , mfDeclMap = extractDeclMap pf - , mfTypeEnv = FAT.extractTypeEnvExtended pf - , mfParamVarMap = extractParamVarMap pf - , mfFilename = F.pfGetFilename pf } +regenModFile :: forall a. (Data a) => SourceHash -> F.ProgramFile (FA.Analysis a) -> ModFile -> ModFile +regenModFile srcHash pf mf = mf { mfSourceHash = srcHash + , mfModuleMap = extractModuleMap pf + , mfDeclMap = extractDeclMap pf + , mfTypeEnv = FAT.extractTypeEnvExtended pf + , mfParamVarMap = extractParamVarMap pf + -- Store only the basename for portability; the .fsmod + -- file's location provides the directory context + , mfFilename = System.FilePath.takeFileName (F.pfGetFilename pf) } -- | Generate a fresh ModFile from the module map, declaration map and -- type analysis of a given analysed and renamed ProgramFile. -genModFile :: forall a. (Data a) => F.ProgramFile (FA.Analysis a) -> ModFile -genModFile = flip regenModFile emptyModFile +genModFile :: forall a. (Data a) => SourceHash -> F.ProgramFile (FA.Analysis a) -> ModFile +genModFile srcHash pf = regenModFile srcHash pf emptyModFile -- | Looks up the raw "other data" that may be stored in a ModFile by -- applications that make use of fortran-src. @@ -208,6 +262,12 @@ decodeModFiles = foldM (\ modFiles d -> do return [(modFileName, emptyModFile)] Right mods -> do hPutStrLn stderr $ modFileName ++ ": successfully parsed precompiled file." + -- Check if the source files referenced in mfFilename exist + forM_ mods $ \ mf -> do + let srcFile = d mfFilename mf + exists <- doesFileExist srcFile + unless exists $ + hPutStrLn stderr $ modFileName ++ ": Warning: source file not found: " ++ srcFile return $ map (modFileName,) mods return $ addedModFiles ++ modFiles ) [] -- can't use emptyModFiles @@ -249,6 +309,10 @@ combinedParamVarMap = M.unions . map mfParamVarMap moduleFilename :: ModFile -> String moduleFilename = mfFilename +-- | Get the source hash from the ModFile. +moduleSourceHash :: ModFile -> SourceHash +moduleSourceHash = mfSourceHash + -------------------------------------------------- -- | Create a map that links all unique variable/function names in the @@ -360,22 +424,67 @@ extractParamVarMap pf = M.fromList cvm , (F.Declarator _ _ v F.ScalarDecl _ _) <- universeBi st :: [F.Declarator (FA.Analysis a)] , Just con <- [FA.constExp (F.getAnnotation v)] ] --- | Status of mod-file compared to Fortran file. -data TimestampStatus = NoSuchFile | CompileFile | ModFileExists FilePath +-- | Status of mod-file compared to Fortran file (hash-based). +data HashStatus = NoSuchFile | CompileFile | ModFileExists FilePath + deriving (Eq, Show) + +-- | Status of mod-file compared to Fortran file (timestamp-based, deprecated). +data TimestampStatus = TSNoSuchFile | TSCompileFile | TSModFileExists FilePath + deriving (Eq, Show) + +-- | Check if a ModFile needs recompiling by comparing source file hash. +-- Checks if both source and .fsmod exist, loads the .fsmod, and compares hashes. +-- Returns whether to compile or use the existing ModFile. +checkModFileHash :: FilePath -> IO HashStatus +checkModFileHash path = do + pathExists <- doesFileExist path + let modPath = path -<.> modFileSuffix + modExists <- doesFileExist modPath + case (pathExists, modExists) of + (False, _) -> pure NoSuchFile + (True, False) -> pure CompileFile + (True, True) -> do + -- Load the modfile and check if hash matches + -- Use strict read to avoid lazy I/O file locking issues + contents <- BL.readFile modPath + !strictContents <- evaluate (BL.toStrict contents) + let lazyContents = BL.fromStrict strictContents + case decodeModFile lazyContents of + Left _ -> pure CompileFile -- Corrupted modfile, recompile + Right [] -> pure CompileFile -- Empty modfile, recompile + Right (modFile:_) -> do + -- Check hash of source file + currentHash <- computeFileHash path + if currentHash == mfSourceHash modFile + then pure $ ModFileExists modPath + else pure CompileFile + +-- | Compare the source file hash to the hash stored in the ModFile. +-- This is the preferred method for checking if a ModFile is up-to-date. +checkSourceHash :: FilePath -> ModFile -> IO HashStatus +checkSourceHash path modFile = do + pathExists <- doesFileExist path + if not pathExists + then pure NoSuchFile + else do + currentHash <- computeFileHash path + if currentHash == mfSourceHash modFile + then pure $ ModFileExists (path -<.> modFileSuffix) + else pure CompileFile -- | Compare the source file timestamp to the fsmod file timestamp, if --- it exists. +-- it exists. DEPRECATED: Use checkSourceHash instead for more reliable validation. checkTimestamps :: FilePath -> IO TimestampStatus checkTimestamps path = do pathExists <- doesFileExist path modExists <- doesFileExist $ path -<.> modFileSuffix case (pathExists, modExists) of - (False, _) -> pure NoSuchFile - (True, False) -> pure CompileFile + (False, _) -> pure TSNoSuchFile + (True, False) -> pure TSCompileFile (True, True) -> do let modPath = path -<.> modFileSuffix pathModTime <- getModificationTime path modModTime <- getModificationTime modPath if pathModTime < modModTime - then pure $ ModFileExists modPath - else pure CompileFile + then pure $ TSModFileExists modPath + else pure TSCompileFile diff --git a/test/Language/Fortran/Analysis/ModFileSpec.hs b/test/Language/Fortran/Analysis/ModFileSpec.hs index 9ea4a245..6f2cf5ec 100644 --- a/test/Language/Fortran/Analysis/ModFileSpec.hs +++ b/test/Language/Fortran/Analysis/ModFileSpec.hs @@ -37,10 +37,11 @@ testModuleMaps = do let fixturePath = "test-data" "module" paths <- expandDirs [fixturePath] -- parse all files into mod files - pfs <- mapM (\p -> pParser p) paths - let modFiles = map genModFile pfs - -- get unique name to filemap - let mmap = genUniqNameToFilenameMap "" modFiles + pfs <- mapM pParser paths + hashes <- mapM computeFileHash paths + let modFiles = zipWith genModFile hashes pfs + -- get unique name to filemap (pass the directory as localPath) + let mmap = genUniqNameToFilenameMap fixturePath modFiles -- check that `constant` is declared in leaf.f90 let Just (leaf, _) = M.lookup "leaf_constant_1" mmap leaf `shouldBe` ("test-data" "module" "leaf.f90")