Last time, we created a library of Search Engine Optimization (SEO) helpers. This time we’ll see how to use them to actually fetch information about your webiste rankings (and your competitors’).
Notes :
- F# PowerPack needs to be added in the references, and so does System.Windows.Forms ;
- in the same directory as the application, you need : keywords.txt (a file which contains all keywords / keyword expressions, one per line), websites.txt (the websites which are analyzed, with one per line) and a folder named “output” (although this is customizable in the application options, see at the bottom of the post).
Helpers
namespace SeoAnalysis open System open System.IO open System.Text open System.Windows.Forms [<AutoOpen>] module internal Helpers = let shouldKeep url websites = Set.contains url websites let host url = try let uri = Uri(url) sprintf "%s://%s" uri.Scheme uri.Host with _ -> String.Empty let defaultTimeout task ms defaultValue = try Async.RunSynchronously(task, ms) with _ -> defaultValue let appDir = Path.GetDirectoryName(Application.ExecutablePath) let filename outputDir x = let now = DateTime.Now.ToString("yyyyMMdd") let filename = sprintf "%s_%s.csv" now x Path.Combine(outputDir, filename) let streamWriter fs = new StreamWriter(fs, Encoding.UTF8)
PageRank
We use a timeout for the asynchronous fetching of the page rank so that we don’t wait for hours if the Google PageRank is unavailable for whatever reason (server down, we are blacklisted etc.)
namespace SeoAnalysis open System.Collections.Generic module PageRank = let processor = MailboxProcessor<_>.Start(fun inbox -> let rec loop (acc : Dictionary<_, _>) = async { let! (website, c:AsyncReplyChannel<_>) = inbox.Receive() if not <| acc.ContainsKey(website) then let pr = defaultTimeout (Seo.Google.asyncPageRank website) 250 0 acc.[website] <- pr c.Reply(acc.[website]) return! loop acc } loop (Dictionary<_, _>()) ) let get website = processor.PostAndAsyncReply (fun reply -> (website, reply))
IndexedLinks
namespace SeoAnalysis open System open System.Collections.Generic open System.IO open SeoAnalysis module IndexedLinks = let INDEXED_LINKS = "IndexedLinks" // // do not modify below // let run engine websites maxResults outputDir = async { if maxResults > 0u then let path = filename outputDir INDEXED_LINKS use fs = File.Create(path) use sw = streamWriter fs sw.WriteLine("MOTEUR, SITE, PR, NOMBRE DE PAGES INDEXEES, PAGE") let all = Dictionary<_,_>() for website in websites do let! pageRank = PageRank.get website let! nIndexedLinks, indexedLinks = Seo.SearchEngine.indexedLinks engine website maxResults for link,_ in indexedLinks do sprintf "%s, %s, %d, %d, %s" engine.Name website pageRank nIndexedLinks link.Url |> sw.WriteLine all.[website] <- indexedLinks printfn "%s %s" INDEXED_LINKS System.Environment.NewLine }
KeywordMatches
namespace SeoAnalysis open System open System.Collections.Generic open System.IO open System.Text open Seo open SeoAnalysis module KeywordMatches = let KEYWORD_MATCHES = "KeywordMatches" let NUM_MATCHES_PER_KEYWORD = "NumMatchesPerKeyword" let BEST_RANK_PER_KEYWORD = "BestRankPerKeyword" let NUM_TOP_RANKS = "NumTopRanks" // // do not modify below // //[allKeywordMatches engine keywords websites maxResults] returns: //Keyword => // n, // [host => // { (link1, rank1); // (link_n, rank_n) // } // ] let allKeywordMatches engine keywords websites maxResults = async { let all = Dictionary<_, _>() let hosts = Dictionary<_, _>() for keyword in keywords do printfn "fetching matches for %s" keyword let! nAllKeywordMatches, allKeywordMatches = SearchEngine.keywordMatches engine keyword maxResults for (link, _) in allKeywordMatches do let hostUrl = host link.Url if hostUrl.Length > 0 then hosts.[link] <- hostUrl let filtered = match websites with | None -> allKeywordMatches | Some ws -> allKeywordMatches |> List.filter (fun (link, rank) -> ws |> Seq.exists (fun (w:string) -> //make sure there is no flaky link that we could't add hosts.ContainsKey(link) && shouldKeep hosts.[link] ws ) ) let grouppedFiltered = filtered |> Seq.groupBy (fun (link, rank) -> hosts.[link]) all.[keyword] <- (nAllKeywordMatches, grouppedFiltered ) return all } let keywordMatches (engine:SearchEngine.t) all outputDir = async { printfn "beginning %s" KEYWORD_MATCHES let path = filename outputDir KEYWORD_MATCHES use fs = File.Create(path) use sw = streamWriter fs sw.WriteLine("MOTEUR, MOT CLE, NB. RESULTATS TOTAL, POSITION, SITE, PAGE RANK, PAGE, TITRE") for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do for (website, links) in grouppedFiltered do for (link:Link), rank in links do let! pr = PageRank.get website let line = sprintf "%s, %s, %d, %d, %s, %d, %s, %s" engine.Name keyword nAllKeywordMatches rank website pr link.Url (link.Title.Replace(",", "-")) sw.WriteLine line printfn "%s done" KEYWORD_MATCHES } let numMatchesPerKeyword websites all outputDir = printfn "beginning %s" NUM_MATCHES_PER_KEYWORD let path = filename outputDir NUM_MATCHES_PER_KEYWORD use fs = File.Create(path) use sw = streamWriter fs let counter = let xs = Dictionary<_, _>() for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do xs.[keyword] <- Dictionary<_, _>() for website in websites do xs.[keyword].[website] <- 0 xs let buf = StringBuilder() for website in websites do Printf.bprintf buf ",%s" website sw.WriteLine(buf.ToString()) for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do for (website, links) in grouppedFiltered do counter.[keyword].[website] <- Seq.length links buf.Length <- 0 Printf.bprintf buf "%s" keyword for website in websites do Printf.bprintf buf ",%d" counter.[keyword].[website] sw.WriteLine(buf.ToString()) printfn "%s done" NUM_MATCHES_PER_KEYWORD let bestRankPerKeyword websites all outputDir = printfn "beginning %s" BEST_RANK_PER_KEYWORD let path = filename outputDir BEST_RANK_PER_KEYWORD use fs = File.Create(path) use sw = streamWriter fs let buf = StringBuilder() let counter = let xs = Dictionary<_, _>() for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do xs.[keyword] <- Dictionary<_, _>() for website in websites do xs.[keyword].[website] <- 0u xs for website in websites do Printf.bprintf buf ",%s" website sw.WriteLine(buf.ToString()) for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do for (website, links) in grouppedFiltered do let _, bestRank = links |> Seq.minBy (fun (_, rank) -> rank) counter.[keyword].[website] <- bestRank buf.Length <- 0 Printf.bprintf buf "%s" keyword for website in websites do let best = counter.[keyword].[website] Printf.bprintf buf ",%d" (if best = 0u then UInt32.MaxValue else best) sw.WriteLine(buf.ToString()) printfn "%s done" BEST_RANK_PER_KEYWORD let numTopRanks websites all outputDir = printfn "beginning %s" NUM_TOP_RANKS let path = filename outputDir NUM_TOP_RANKS use fs = File.Create(path) use sw = streamWriter fs sw.WriteLine("URL(s),1ère Pos.,TOP 3,TOP 10,TOP 20,TOTAL") let topMatches = let xs = Dictionary<_, _>() for website in websites do xs.[website] <- Array.create 5 0 xs for KeyValue(keyword, (nAllKeywordMatches, grouppedFiltered)) in all do for (website, links) in grouppedFiltered do for(link, rank) in links do topMatches.[website].[4] <- topMatches.[website].[4] + 1 if rank <= 20u then topMatches.[website].[3] <- topMatches.[website].[3] + 1 if rank <= 10u then topMatches.[website].[2] <- topMatches.[website].[2] + 1 if rank <= 3u then topMatches.[website].[1] <- topMatches.[website].[1] + 1 if rank = 1u then topMatches.[website].[0] <- topMatches.[website].[0] + 1 for website in websites do let line = sprintf "%s,%d,%d,%d,%d,%d" website topMatches.[website].[0] topMatches.[website].[1] topMatches.[website].[2] topMatches.[website].[3] topMatches.[website].[4] sw.WriteLine(line) printfn "%s done" NUM_TOP_RANKS let run engine keywords websites maxResults outputDir = async { if maxResults > 0u then printfn "running keyword matches functions..." let! all = allKeywordMatches engine keywords websites maxResults do! keywordMatches engine all outputDir websites |> Option.iter (fun websites -> numMatchesPerKeyword websites all outputDir bestRankPerKeyword websites all outputDir numTopRanks websites all outputDir ) printfn "%s" System.Environment.NewLine }
BackLinks
namespace SeoAnalysis open System open System.Collections.Generic open System.IO open System.Text open Seo open SeoAnalysis module BackLinks = let BACK_LINKS = "BackLinksFull" let BACK_LINKS_PER_WEBSITE = "BackLinksPerWebsite" // // do not modify below // //[allKeywordMatches engine keywords websites maxResults] returns: //Keyword => // n, // [host => // { (link1, rank1); // (link_n, rank_n) // } // ] let allBackLinks engine websites maxResults = async { let all = Dictionary<_, _>() for website in websites do printfn "fetching page rank for %s" website let! websitePageRank = PageRank.get website printfn "fetching back links for %s" website let! nBackLinks, backLinks = SearchEngine.backLinks engine website false false maxResults for link, rank in backLinks do let ok, v = all.TryGetValue(link) let acc = if ok then snd v else Set.empty let! backLinkPageRank = PageRank.get link.Url all.[link] <- (backLinkPageRank, Set.add (website, websitePageRank, nBackLinks, rank) acc) return all } let backLinks (engine:SearchEngine.t) all outputDir = printfn "beginning %s" BACK_LINKS let path = filename outputDir BACK_LINKS use fs = File.Create(path) use sw = streamWriter fs sw.WriteLine("ENGINE, URL SITE, PAGE RANK SITE, NB TOTAL REFERENTS, SITE REFERENT, PAGE RANK SITE REFERENT, PAGE REFERENTE, TITRE PAGE, RANG REFERENT") for KeyValue(backLink:Link, (backLinkPageRank, websites)) in all do for (website, websitePageRank, nBackLinks, backLinkRank) in websites do let line = sprintf "%s, %s, %d, %d, %s, %d, %s, %s, %d" engine.Name website websitePageRank nBackLinks (host backLink.Url) backLinkPageRank backLink.Url (backLink.Title.Replace(",", "-")) backLinkRank sw.WriteLine line printfn "%s done" BACK_LINKS let backLinksPerWebsite (engine:SearchEngine.t) inWebsites all outputDir = printfn "beginning %s" BACK_LINKS_PER_WEBSITE let path = filename outputDir BACK_LINKS_PER_WEBSITE use fs = File.Create(path) use sw = streamWriter fs let checker = let xs = Dictionary<_, _>() for KeyValue(backLink:Link, (backLinkPageRank, websites)) in all do xs.[backLink] <- Dictionary<_, _>() for website in inWebsites do xs.[backLink].[website] <- false xs let buf = StringBuilder() for website in inWebsites do Printf.bprintf buf ",%s" website sw.WriteLine(buf.ToString()) for KeyValue(backLink:Link, (backLinkPageRank, websites)) in all do for (website, websitePageRank, nBackLinks, backLinkRank) in websites do if not checker.[backLink].[website] then checker.[backLink].[website] <- true buf.Length <- 0 Printf.bprintf buf "%s" backLink.Url for website in inWebsites do Printf.bprintf buf ",%A" checker.[backLink].[website] sw.WriteLine(buf.ToString()) printfn "%s done" BACK_LINKS_PER_WEBSITE let run engine websites maxResults outputDir = async { if maxResults > 0u then printfn "running back links functions..." let! all = allBackLinks engine websites maxResults backLinks engine all outputDir backLinksPerWebsite engine websites all outputDir printfn "%s" System.Environment.NewLine }
Application
namespace SeoAnalysis open System open System.Collections.Generic open System.IO open System.Text open System.Windows.Forms open Seo open SeoAnalysis module App = let maxKeywordResults = ref 100u let maxIndexedLinks = ref 100u let maxBackLinks = ref 100u let outputDir = ref <| Path.Combine(appDir, "output") let websites = File.ReadAllLines(Path.Combine(appDir, "websites.txt"), Text.Encoding.UTF8) |> Array.map (fun s -> s.ToLower()) |> Set.ofArray let keywords = File.ReadAllLines(Path.Combine(appDir, "keywords.txt"), Text.Encoding.UTF8) |> Array.map (fun s -> s.ToLower()) |> Set.ofArray let searchEngine = ref Google.SEARCH_ENGINE let _searchEngine (s:string) = match s.ToLower() with | "bing" -> Bing.SEARCH_ENGINE | "yahoo" -> Yahoo.SEARCH_ENGINE | _ -> Google.SEARCH_ENGINE let outputName = ref "a.out" let verbose = ref false let warningLevel = ref 0 let specs = [ "--output-directory", ArgType.String (fun s -> outputDir := s), "Path of the output directory" "--engine", ArgType.String (fun s -> searchEngine := _searchEngine s), "Search engine (google, yahoo or bing)" "--keywords", ArgType.Int (fun i -> maxKeywordResults := uint32 i), "Maximum number of results fetched when searching keyword-related info" "--indexed-links", ArgType.Int (fun i -> maxIndexedLinks := uint32 i), "Maximum number of results fetched when searching indexed links" "--back-links", ArgType.Int (fun i -> maxBackLinks := uint32 i), "Maximum number of results fetched when searching back links" ] |> List.map (fun (sh, ty, desc) -> ArgInfo(sh, ty, desc)) [<EntryPoint>] do ArgParser.Parse(specs) async { do! IndexedLinks.run !searchEngine websites !maxIndexedLinks !outputDir do! KeywordMatches.run !searchEngine keywords (Some websites) !maxKeywordResults !outputDir do! BackLinks.run !searchEngine websites !maxBackLinks !outputDir } |> Async.RunSynchronously