var documents = new[] { "i like apples", "i like pears", "i like fruit like oranges", "i hate bananas" }; // build index var index = from document in documents // term frequency - number of times a term appears in a document let terms = System.Text.RegularExpressions.Regex.Split(document, @"\W+") let termFrequencies = terms .GroupBy(t => t) .Select(g => (term: g.Key, frequency: (double)g.Count() / terms.Length)) // inverse document frequency - log of the ratio of the *number of documents* to the *number of documents containing the term*ƒƒ let uniqueTerms = terms.Distinct() let inverseDocumentFrequencies = from term in uniqueTerms let documentCount = documents.Count(d => d.Contains(term)) select (term, score: Math.Log((double)documents.Length / documentCount)) // calculate tf*idf let scores = from tf in termFrequencies join idf in inverseDocumentFrequencies on tf.term equals idf.term select (tf.term, score: tf.frequency * idf.score) // document scores select (document, scores); // search index var results = from result in index from scores in result.scores where scores.term == "like" orderby scores.score descending select (result.document, scores.score); results.Select((r, i) => $"{1 + i}. {r.document,-25} {r.score:F5}") .ToList() .ForEach(Console.WriteLine);