Last time, we created a library of Search Engine Optimization (SEO) helpers. This time we’ll see how to use them to actually fetch information about your webiste rankings (and your competitors’).

Notes :

  • F# PowerPack needs to be added in the references, and so does System.Windows.Forms ;
  • in the same directory as the application, you need : keywords.txt (a file which contains all keywords / keyword expressions, one per line), websites.txt (the websites which are analyzed, with one per line) and a folder named “output” (although this is customizable in the application options, see at the bottom of the post).

Helpers

namespace SeoAnalysis

open System
open System.IO
open System.Text
open System.Windows.Forms

[<AutoOpen>]
module internal Helpers = 

  let shouldKeep url websites = Set.contains url websites

  let host url =
    try
      let uri = Uri(url)
      sprintf "%s://%s" uri.Scheme uri.Host
    with _ -> String.Empty

  let defaultTimeout task ms defaultValue =
    try
      Async.RunSynchronously(task, ms)
    with
      _ -> defaultValue

  let appDir = Path.GetDirectoryName(Application.ExecutablePath)

  let filename outputDir x =
    let now = DateTime.Now.ToString("yyyyMMdd")
    let filename = sprintf "%s_%s.csv" now x
    Path.Combine(outputDir, filename)

  let streamWriter fs = new StreamWriter(fs, Encoding.UTF8)

PageRank

We use a timeout for the asynchronous fetching of the page rank so that we don’t wait for hours if the Google PageRank is unavailable for whatever reason (server down, we are blacklisted etc.)

namespace SeoAnalysis

open System.Collections.Generic

module PageRank =

  let processor =
    MailboxProcessor<_>.Start(fun inbox ->
      let rec loop (acc : Dictionary<_, _>) =
        async {
          let! (website, c:AsyncReplyChannel<_>)  = inbox.Receive()
          if not <| acc.ContainsKey(website) then
            let pr = defaultTimeout (Seo.Google.asyncPageRank website) 250 0
            acc.[website] <- pr
          c.Reply(acc.[website])
          return! loop acc
        }
      loop (Dictionary<_, _>())
    )  

  let get website =
    processor.PostAndAsyncReply (fun reply -> (website, reply))

IndexedLinks

namespace SeoAnalysis

open System
open System.Collections.Generic
open System.IO

open SeoAnalysis

module IndexedLinks =

  let INDEXED_LINKS = "IndexedLinks"

  //
  // do not modify below
  //

  let run engine websites maxResults outputDir =
    async {
      if maxResults > 0u then
        let path = filename outputDir INDEXED_LINKS
        use fs = File.Create(path)
        use sw = streamWriter fs
        sw.WriteLine("MOTEUR, SITE, PR, NOMBRE DE PAGES INDEXEES, PAGE")

        let all = Dictionary<_,_>()

        for website in websites do
          let! pageRank = PageRank.get website

          let! nIndexedLinks, indexedLinks = Seo.SearchEngine.indexedLinks engine website maxResults  

          for link,_ in indexedLinks do
            sprintf "%s, %s, %d, %d, %s"
              engine.Name
              website
              pageRank
              nIndexedLinks
              link.Url
            |> sw.WriteLine

          all.[website] <- indexedLinks

        printfn "%s %s" INDEXED_LINKS System.Environment.NewLine
    }

KeywordMatches

namespace SeoAnalysis

open System
open System.Collections.Generic
open System.IO
open System.Text

open Seo
open SeoAnalysis

module KeywordMatches =

  let KEYWORD_MATCHES = "KeywordMatches"
  let NUM_MATCHES_PER_KEYWORD = "NumMatchesPerKeyword"
  let BEST_RANK_PER_KEYWORD = "BestRankPerKeyword"
  let NUM_TOP_RANKS = "NumTopRanks"

  //
  // do not modify below
  //

  //[allKeywordMatches engine keywords websites maxResults] returns:
  //Keyword =>
  //  n,
  //  [host =>
  //    { (link1, rank1);
  //      (link_n, rank_n)
  //    }
  //  ]
  let allKeywordMatches engine keywords websites maxResults =
    async {
      let all = Dictionary<_, _>()

      let hosts = Dictionary<_, _>()

      for keyword in keywords do
        printfn "fetching matches for %s" keyword
        let! nAllKeywordMatches, allKeywordMatches =
          SearchEngine.keywordMatches engine keyword maxResults

        for (link, _) in allKeywordMatches do
          let hostUrl = host link.Url
          if hostUrl.Length > 0 then hosts.[link] <- hostUrl

        let filtered =
          match websites with
          | None -> allKeywordMatches
          | Some ws ->
              allKeywordMatches |> List.filter (fun (link, rank) ->
                ws |> Seq.exists (fun (w:string) ->
                  //make sure there is no flaky link that we could't add
                  hosts.ContainsKey(link) && shouldKeep hosts.[link] ws
                )
              )      

        let grouppedFiltered = filtered |> Seq.groupBy (fun (link, rank) -> hosts.[link])
        all.[keyword] <- (nAllKeywordMatches, grouppedFiltered )

      return all
    }

  let keywordMatches (engine:SearchEngine.t) all outputDir =
    async {
      printfn "beginning %s" KEYWORD_MATCHES
      let path = filename outputDir KEYWORD_MATCHES
      use fs = File.Create(path)
      use sw = streamWriter fs
      sw.WriteLine("MOTEUR, MOT CLE, NB. RESULTATS TOTAL, POSITION, SITE, PAGE RANK, PAGE, TITRE")

      for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do
        for (website, links) in grouppedFiltered do
          for (link:Link), rank in links do
            let! pr = PageRank.get website
            let line =
              sprintf "%s, %s, %d, %d, %s, %d, %s, %s"
                engine.Name
                keyword
                nAllKeywordMatches
                rank
                website
                pr
                link.Url
                (link.Title.Replace(",", "-"))
            sw.WriteLine line

      printfn "%s done" KEYWORD_MATCHES
    }

  let numMatchesPerKeyword websites all outputDir =
    printfn "beginning %s" NUM_MATCHES_PER_KEYWORD
    let path = filename outputDir NUM_MATCHES_PER_KEYWORD
    use fs = File.Create(path)
    use sw = streamWriter fs    

    let counter =
      let xs = Dictionary<_, _>()
      for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do
        xs.[keyword] <- Dictionary<_, _>()
        for website in websites do
          xs.[keyword].[website] <- 0
      xs

    let buf = StringBuilder()

    for website in websites do
      Printf.bprintf buf ",%s" website
    sw.WriteLine(buf.ToString())

    for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do
      for (website, links) in grouppedFiltered do
        counter.[keyword].[website] <- Seq.length links

      buf.Length <- 0
      Printf.bprintf buf "%s" keyword
      for website in websites do
        Printf.bprintf buf ",%d" counter.[keyword].[website]
      sw.WriteLine(buf.ToString())

    printfn "%s done" NUM_MATCHES_PER_KEYWORD

  let bestRankPerKeyword websites all outputDir =
    printfn "beginning %s" BEST_RANK_PER_KEYWORD
    let path = filename outputDir BEST_RANK_PER_KEYWORD
    use fs = File.Create(path)
    use sw = streamWriter fs    

    let buf = StringBuilder()

    let counter =
      let xs = Dictionary<_, _>()
      for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do
        xs.[keyword] <- Dictionary<_, _>()
        for website in websites do
          xs.[keyword].[website] <- 0u
      xs

    for website in websites do
      Printf.bprintf buf ",%s" website
    sw.WriteLine(buf.ToString())

    for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do
      for (website, links) in grouppedFiltered do
        let _, bestRank = links |> Seq.minBy (fun (_, rank) -> rank)
        counter.[keyword].[website] <- bestRank

      buf.Length <- 0
      Printf.bprintf buf "%s" keyword
      for website in websites do
        let best = counter.[keyword].[website]
        Printf.bprintf buf ",%d" (if best = 0u then UInt32.MaxValue else best)
      sw.WriteLine(buf.ToString())

    printfn "%s done" BEST_RANK_PER_KEYWORD

  let numTopRanks websites all outputDir =
    printfn "beginning %s" NUM_TOP_RANKS
    let path = filename outputDir NUM_TOP_RANKS
    use fs = File.Create(path)
    use sw = streamWriter fs    

    sw.WriteLine("URL(s),1ère Pos.,TOP 3,TOP 10,TOP 20,TOTAL")

    let topMatches =
      let xs = Dictionary<_, _>()
      for website in websites do
        xs.[website] <- Array.create 5 0
      xs

    for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do
      for (website, links) in grouppedFiltered do
        for(link, rank) in links do
          topMatches.[website].[4] <- topMatches.[website].[4] + 1
          if rank <= 20u then
            topMatches.[website].[3] <- topMatches.[website].[3] + 1
          if rank <= 10u then
            topMatches.[website].[2] <- topMatches.[website].[2] + 1
          if rank <= 3u then
            topMatches.[website].[1] <- topMatches.[website].[1] + 1
          if rank = 1u then
            topMatches.[website].[0] <- topMatches.[website].[0] + 1

    for website in websites do
      let line =
        sprintf "%s,%d,%d,%d,%d,%d"
          website
          topMatches.[website].[0]
          topMatches.[website].[1]
          topMatches.[website].[2]
          topMatches.[website].[3]
          topMatches.[website].[4]
      sw.WriteLine(line)

    printfn "%s done" NUM_TOP_RANKS       

  let run engine keywords websites maxResults outputDir =
    async {
      if maxResults > 0u then
        printfn "running keyword matches functions..."
        let! all = allKeywordMatches engine keywords websites maxResults
        do! keywordMatches engine all outputDir
        websites |> Option.iter (fun websites ->
          numMatchesPerKeyword websites all outputDir
          bestRankPerKeyword websites all outputDir
          numTopRanks websites all outputDir
        )
        printfn "%s" System.Environment.NewLine
    }

BackLinks

namespace SeoAnalysis

open System
open System.Collections.Generic
open System.IO
open System.Text

open Seo
open SeoAnalysis

module BackLinks =

  let BACK_LINKS = "BackLinksFull"
  let BACK_LINKS_PER_WEBSITE = "BackLinksPerWebsite"

  //
  // do not modify below
  //

  //[allKeywordMatches engine keywords websites maxResults] returns:
  //Keyword =>
  //  n,
  //  [host =>
  //    { (link1, rank1);
  //      (link_n, rank_n)
  //    }
  //  ]
  let allBackLinks engine websites maxResults =
    async {
      let all = Dictionary<_, _>()

      for website in websites do
        printfn "fetching page rank for %s" website
        let! websitePageRank = PageRank.get website

        printfn "fetching back links for %s" website
        let! nBackLinks, backLinks = SearchEngine.backLinks engine website false false maxResults

        for link, rank in backLinks do
          let ok, v = all.TryGetValue(link)
          let acc = if ok then snd v else Set.empty
          let! backLinkPageRank = PageRank.get link.Url
          all.[link] <- (backLinkPageRank, Set.add (website, websitePageRank, nBackLinks, rank) acc)

      return all
    }

  let backLinks (engine:SearchEngine.t) all outputDir =
    printfn "beginning %s" BACK_LINKS
    let path = filename outputDir BACK_LINKS
    use fs = File.Create(path)
    use sw = streamWriter fs
    sw.WriteLine("ENGINE, URL SITE, PAGE RANK SITE, NB TOTAL REFERENTS, SITE REFERENT, PAGE RANK SITE REFERENT, PAGE REFERENTE, TITRE PAGE, RANG REFERENT")

    for KeyValue(backLink:Link, (backLinkPageRank, websites)) in all do
      for (website, websitePageRank, nBackLinks, backLinkRank) in websites do
        let line =
          sprintf "%s, %s, %d, %d, %s, %d, %s, %s, %d"
            engine.Name
            website
            websitePageRank
            nBackLinks
            (host backLink.Url)
            backLinkPageRank
            backLink.Url
            (backLink.Title.Replace(",", "-"))
            backLinkRank
        sw.WriteLine line

    printfn "%s done" BACK_LINKS

  let backLinksPerWebsite (engine:SearchEngine.t) inWebsites all outputDir =
    printfn "beginning %s" BACK_LINKS_PER_WEBSITE
    let path = filename outputDir BACK_LINKS_PER_WEBSITE
    use fs = File.Create(path)
    use sw = streamWriter fs

    let checker =
      let xs = Dictionary<_, _>()
      for KeyValue(backLink:Link, (backLinkPageRank, websites)) in all do
        xs.[backLink] <- Dictionary<_, _>()
        for website in inWebsites do
          xs.[backLink].[website] <- false
      xs

    let buf = StringBuilder()      

    for website in inWebsites do
      Printf.bprintf buf ",%s" website
    sw.WriteLine(buf.ToString())

    for KeyValue(backLink:Link, (backLinkPageRank, websites)) in all do
      for (website, websitePageRank, nBackLinks, backLinkRank) in websites do
        if not checker.[backLink].[website] then
          checker.[backLink].[website] <- true

      buf.Length <- 0
      Printf.bprintf buf "%s" backLink.Url
      for website in inWebsites do
        Printf.bprintf buf ",%A" checker.[backLink].[website]
      sw.WriteLine(buf.ToString())    

    printfn "%s done" BACK_LINKS_PER_WEBSITE

  let run engine websites maxResults outputDir =
    async {
      if maxResults > 0u then
        printfn "running back links functions..."
        let! all = allBackLinks engine websites maxResults
        backLinks  engine all outputDir
        backLinksPerWebsite  engine websites all outputDir
        printfn "%s" System.Environment.NewLine
    }

Application

namespace SeoAnalysis

open System
open System.Collections.Generic
open System.IO
open System.Text
open System.Windows.Forms

open Seo
open SeoAnalysis

module App = 

  let maxKeywordResults = ref 100u
  let maxIndexedLinks = ref 100u
  let maxBackLinks = ref 100u

  let outputDir = ref <| Path.Combine(appDir, "output")

  let websites =
    File.ReadAllLines(Path.Combine(appDir, "websites.txt"), Text.Encoding.UTF8)
    |> Array.map (fun s -> s.ToLower())
    |> Set.ofArray

  let keywords =
    File.ReadAllLines(Path.Combine(appDir, "keywords.txt"), Text.Encoding.UTF8)
    |> Array.map (fun s -> s.ToLower())
    |> Set.ofArray

  let searchEngine = ref Google.SEARCH_ENGINE

  let _searchEngine (s:string) =
    match s.ToLower() with
    | "bing" -> Bing.SEARCH_ENGINE
    | "yahoo" -> Yahoo.SEARCH_ENGINE
    | _ -> Google.SEARCH_ENGINE

  let outputName = ref "a.out"
  let verbose = ref false
  let warningLevel = ref 0
  let specs =
    [ "--output-directory", ArgType.String (fun s -> outputDir := s), "Path of the output directory"
      "--engine", ArgType.String (fun s -> searchEngine := _searchEngine s), "Search engine (google, yahoo or bing)"
      "--keywords", ArgType.Int (fun i -> maxKeywordResults := uint32 i), "Maximum number of results fetched when searching keyword-related info"
      "--indexed-links", ArgType.Int (fun i -> maxIndexedLinks := uint32 i), "Maximum number of results fetched when searching indexed links"
      "--back-links", ArgType.Int (fun i -> maxBackLinks := uint32 i), "Maximum number of results fetched when searching back links"
    ] |> List.map (fun (sh, ty, desc) -> ArgInfo(sh, ty, desc))

  [<EntryPoint>]
  do
    ArgParser.Parse(specs)
    async {
      do! IndexedLinks.run !searchEngine websites !maxIndexedLinks !outputDir
      do! KeywordMatches.run !searchEngine keywords (Some websites) !maxKeywordResults !outputDir
      do! BackLinks.run !searchEngine websites !maxBackLinks !outputDir
    } |> Async.RunSynchronously

Comments are closed.