In this post, we shall use read/write requests from a group of files considered as a single entity. We will also look at verifying the data. In our model, the group of files is defined by the files, and pieces. A piece is a chunk of data with a standardized size so that read/write requests are consistent. Of course, the last piece of the files stream could see its size differ from others since if the cumulative size of the files is not a multiple of the piece length.

General helpers

open System
open System.Security.Cryptography
open System.IO
open System.Net.NetworkInformation
open System.Threading

module Application =

  let directory() =
    try Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location)
    with :? System.NotSupportedException -> ""

  let drive (directory:string) =
    let idx = directory.IndexOf(Path.DirectorySeparatorChar)
    if idx = -1 then invalidArg "directory" "drive : invalid spelling"
    directory.[0..idx]

  let diskFreeSpace (drive:string) =
    let driveInfo = new DriveInfo(drive)
    if driveInfo.IsReady then
      driveInfo.TotalFreeSpace
    else
      failwith "diskFreeSpace : drive is not ready" 

// (c) Microsoft Corporation 2005-2009.
// Taken from PowerPack
// Removed OCaml warnings
//changed the dirname function to be consistent with other functions

module Filename =
  let dirName (s:string) =
    if s = "" then "."
    else
      match Path.GetDirectoryName(s) with
      | null -> if Path.IsPathRooted(s) then s else "."
      | res -> if res = "" then "." else res

//Additional functions to the System.IO namespace
module IO =

  module Directory =
    let create dir =
      if not <| Directory.Exists(dir) then
        let dirInfo = Directory.CreateDirectory(dir)
        if not dirInfo.Exists then
          invalidArg "dir" "Directory [%s] could not be created" dir  

  module File =
    let rec create fullPath len overwrite =
      if not <| File.Exists(fullPath) then
        let dir = Filename.dirName fullPath
        Directory.create dir
        let drive = Application.drive fullPath
        let freeSpace = Application.diskFreeSpace drive
        if freeSpace >= len then
          if len > 0L then
            use fs = File.Create fullPath
            fs.Seek(len - 1L, SeekOrigin.Begin) |> ignore
            fs.WriteByte 0uy
        else
          failwith "createFile : not enough space available"
      elif overwrite then
        File.Delete(fullPath)
        if File.Exists(fullPath) then
          failwith "createFile : file could not be deleted to be overwritten"
        create fullPath len false

    let copy fileFrom fileTo overwrite =
      if (overwrite || not <| File.Exists(fileTo)) then
        File.Copy(fileFrom, fileTo, overwrite)

    let length path =
      if not <| File.Exists(path) then
        invalidArg "path" "length : file not found"
      let info = new System.IO.FileInfo(path)
      info.Length

    type LockStore = Map<string, Semaphore>

    let addLock (store:LockStore ref) path =
      if not <| Map.contains path !store then
        store := Map.add path (new Semaphore(1, 1)) !store

    let asyncAcquireLock (store:LockStore ref) path =
      async
        { do addLock store path
          return! (!store).[path].AsyncWaitOne(Timeout.Infinite)
        }

    let releaseLock (store:LockStore) path =
      if Map.contains path store then
        ignore <| store.[path].Release()        

//alias for the sha1 functionality already available
module Digest =
  module SHA1 =
    let hexdigest (bytes : byte[]) =
      let csp = new SHA1CryptoServiceProvider()
      csp.ComputeHash(bytes)

File Manager


open IO

//---------------------------------------------------------------
// FILE MANAGER
//---------------------------------------------------------------

type FileEntry =
  { Path : string
    Length : int64
  } 

type FileManager =
    { Files : FileEntry[]
      SHA1s : byte[][]
      PieceLength : int64
      Length : int64
    }  

type ReadRequest =
  { Piece : int
    Offset : int
    Length : int
  }
  static member create p o l =
    { Piece = p
      Offset  = o
      Length  = l
    }

type WriteRequest =
  { Piece : int
    Offset : int
    Data : byte[]
  }
  static member create p o d =
    { Piece = p
      Offset  = o
      Data  = d
    }

[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
module FileManager =

  type t = FileManager

  //-------------------------------------------------------------
  // Events
  //-------------------------------------------------------------

  let filesCreated = new Event<FileManager>()
  let triggerFilesCreated = filesCreated.Trigger
  let FilesCreated = filesCreated.Publish

  let dataRead = new Event<FileManager * ReadRequest * byte[]>()
  let triggerDataRead = dataRead.Trigger
  let DataRead = dataRead.Publish

  let dataWritten = new Event<FileManager * WriteRequest>()
  let triggerDataWritten = dataWritten.Trigger
  let DataWritten = dataWritten.Publish

  let verifiedPiece = new Event<FileManager * int * bool>()
  let triggerVerifiedPiece = verifiedPiece.Trigger
  let VerifiedPiece = verifiedPiece.Publish

  let piecesVerificationProgressed = new Event<FileManager * int>()
  let triggerPiecesVerificationProgressed = piecesVerificationProgressed.Trigger
  let PiecesVerificationProgressed = piecesVerificationProgressed.Publish

  let piecesVerificationDone = new Event<FileManager * bool[]>()
  let triggerPiecesVerificationDone = piecesVerificationDone.Trigger
  let PiecesVerificationDone = piecesVerificationDone.Publish

  //-------------------------------------------------------------
  // I/O operations
  //-------------------------------------------------------------

  let create files sha1s pieceLength =
    { Files = files
      SHA1s = sha1s
      PieceLength = pieceLength
      Length = files |> Array.fold (fun acc file -> acc + file.Length) 0L
    }

  let lastPieceLength (fileManager:t) =
    fileManager.Length % fileManager.PieceLength

  let pieceLength piece fileManager =
    let numPieces = fileManager.SHA1s.Length
    if piece < 0 || piece >= numPieces then
        invalidArg "n" "n is too large or negative"
    elif piece = numPieces - 1 then
      lastPieceLength fileManager
    else
      fileManager.PieceLength

  let generateFiles fileManager =
    for i in 0 .. fileManager.Files.Length - 1 do
      let path = fileManager.Files.[i].Path
      Filename.dirName path |> Directory.create
      let len = fileManager.Files.[i].Length
      let overwrite = File.length path <> len
      File.create path len overwrite
    triggerFilesCreated fileManager

  let coordinatesToStartIdx piece offset fileManager =
    int64 piece * (pieceLength piece fileManager) + int64 offset

  let locks : ref<File.LockStore> = ref Map.empty

  //
  // Reading
  //

  let testReadRequest (req:ReadRequest) fileManager =
    if req.Piece < 0 then invalidArg "readRequest" "ReadRequest : piece was negative"
    if req.Piece >= fileManager.SHA1s.Length then
      invalidArg "readRequest" "ReadRequest : piece was out of bounds"
    if req.Offset < 0 then invalidArg "readRequest" "ReadRequest : offset was negative"
    if req.Length < 0 then invalidArg "readRequest" "ReadRequest : length was negative"
    if int64 (req.Offset + req.Length) > pieceLength req.Piece fileManager then
      invalidArg "readRequest" "ReadRequest : request was out of bounds" 

  let rec asyncReadRange fileManager (memoryStream:MemoryStream) start toRead currentFile currentPos =
    async
      { if toRead > 0 && currentFile < fileManager.Files.Length then
          let file = fileManager.Files.[currentFile]
          let nextFile = currentFile + 1
          let nextPos = currentPos + file.Length
          if currentPos + file.Length > start then
            let path = fileManager.Files.[currentFile].Path
            let! ok = File.asyncAcquireLock locks path
            use! fs = File.AsyncOpenRead path
            fs.Seek(start - currentPos, SeekOrigin.Begin) |> ignore
            let nToEndOfFile = file.Length - fs.Position
            let n = min toRead (int nToEndOfFile)
            if n > 0 then
              let! dataRead = fs.AsyncRead(n)
              if dataRead.Length <> n then
                failwith "asyncReadRange : could not read enough data from "
              do! memoryStream.AsyncWrite(dataRead, 0, dataRead.Length)
            try File.releaseLock !locks path with _ -> ()
            return! asyncReadRange fileManager memoryStream (start + int64 n) (toRead - n) nextFile nextPos
          else
            return! asyncReadRange fileManager memoryStream start toRead nextFile nextPos
        else
          return memoryStream.ToArray()
      }

  let asyncRead req fileManager =
    async
      { testReadRequest req fileManager
        use memoryStream = new MemoryStream()
        let start = coordinatesToStartIdx req.Piece req.Offset fileManager
        let! readBytes = asyncReadRange fileManager memoryStream start req.Length 0 0L
        triggerDataRead (fileManager, req, readBytes)
        return readBytes
      }

  //
  // Writing
  //

  let testWriteRequest (req:WriteRequest) fileManager =
    if req.Piece < 0 then invalidArg "writeRequest" "WriteRequest : piece was negative"
    if req.Piece >= fileManager.SHA1s.Length then
      invalidArg "writeRequest" "WriteRequest : piece was out of bounds"
    if req.Offset < 0 then invalidArg "writeRequest" "WriteRequest : offset was negative" 

  let rec asyncWriteRange fileManager data start toWrite written currentFile currentPos =
    async
      { if toWrite > 0 && currentFile < fileManager.Files.Length then
          let file = fileManager.Files.[currentFile]
          let nextFile = currentFile + 1
          let nextPos = currentPos + file.Length

          if currentPos + file.Length > start then
            let path = fileManager.Files.[currentFile].Path

            let! ok = File.asyncAcquireLock locks path
            use! fs = File.AsyncOpenWrite path
            fs.Seek(start - currentPos, SeekOrigin.Begin) |> ignore
            let nToEndOfFile = file.Length - fs.Position
            let n = min toWrite (int nToEndOfFile)
            if n > 0 then
              try do! fs.AsyncWrite(data, written, n)
              with _ -> failwith "asyncWriteRange : data could not be written"
            try File.releaseLock !locks path with _ -> ()
            return! asyncWriteRange fileManager data (start + int64 n) (toWrite - n) (written + n) nextFile nextPos
          else
            return! asyncWriteRange fileManager data start toWrite written nextFile nextPos
      }

  let asyncWrite req fileManager =
    async
      { testWriteRequest req fileManager
        let start = coordinatesToStartIdx req.Piece req.Offset fileManager
        do! asyncWriteRange fileManager req.Data start req.Data.Length 0 0 0L
        triggerDataWritten (fileManager, req)
      }

  //
  // Verifying
  //

  let asyncVerifySinglePieceAux piece fileManager =
    async
      { if piece >= fileManager.SHA1s.Length then
            invalidArg "piece" "asyncVerifySinglePiece : piece index is out of bounds"
        let len = pieceLength piece fileManager |> int
        let req : ReadRequest =
          { Piece = piece
            Offset = 0
            Length = len
          }
        let! pieceToCheck = asyncRead req fileManager
        return Digest.SHA1.hexdigest pieceToCheck  = fileManager.SHA1s.[piece]
      }

  let asyncVerifySinglePiece piece fileManager =
    async
      { let! ok = asyncVerifySinglePieceAux piece fileManager
        triggerVerifiedPiece(fileManager, piece, ok)
        return ok
      }

  let asyncVerifyMultiplePiecesAux pieces fileManager =
    //the lock may make the advantage of parallelism (via Async.Parallel) vanish
    //but the goal is to have results returned in an array, and parallelism
    //when using SHA1 may yield false results.
    let nVerifiedPieces = ref 0
    pieces |> Seq.map (fun piece ->
      async
        { let! res = asyncVerifySinglePieceAux piece fileManager
          lock fileManager (fun _ ->
            nVerifiedPieces := !nVerifiedPieces + 1
            triggerPiecesVerificationProgressed (fileManager, !nVerifiedPieces)
          )
          return res
        }
    ) |> Async.Parallel 

  let asyncVerifyMultiplePieces pieces fileManager =
    async
      { let! res = asyncVerifyMultiplePiecesAux pieces fileManager
        triggerPiecesVerificationDone(fileManager, res)
        return res
      }  

  let asyncVerifyAllPieces fileManager =
    asyncVerifyMultiplePieces [0..fileManager.SHA1s.Length - 1] fileManager

Comments are closed.