Navigation

7/23/2015

automated Word index

The vb script below will create an index of every word from a document,  (directions for setting up macros)




Sub ConcordanceBuilder()
Application.ScreenUpdating = False
Dim StrIn As String, StrOut As String, StrTmp As String, StrExcl As String
Dim i As Long, j As Long, k As Long, l As Long, Rng As Range
'Define the exlusions list
StrExcl = "a,am,an,and,are,as,at,b,be,but,by,c,can,cm,d,did," & _
          "do,does,e,eg,en,eq,etc,f,for,g,get,go,got,h,has,have," & _
          "he,her,him,how,i,ie,if,in,into,is,it,its,j,k,l,m,me," & _
          "mi,mm,my,n,na,nb,no,not,o,of,off,ok,on,one,or,our,out," & _
          "p,q,r,re,s,she,so,t,the,their,them,they,this,t,to,u,v," & _
          "via,vs,w,was,we,were,who,will,with,would,x,y,yd,you,your,z"
With ActiveDocument
  'Get the document's text
  StrIn = .Content.Text
  'Strip out unwanted characters. Amongst others, hyphens and formatted single quotes are retained at this stage
  For i = 1 To 255
    Select Case i
      Case 1 To 35, 37 to 38, 40 To 43, 45, 47, 58 To 64, 91 To 96, 123 To 127, 129 To 144, 147 To 149, 152 To 162, 164, 166 To 171, 174 To 191, 247
      StrIn = Replace(StrIn, Chr(i), " ")
    End Select
  Next
  'Delete any periods or commas at the end of a word. Formatted numbers are thus retained.
  StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(44) & Chr(32), " "), Chr(44) & vbCr, " "), Chr(46) & Chr(32), " "), Chr(46) & vbCr, " ")
  'Convert smart single quotes to plain single quotes & delete any at the start/end of a word
  StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(145), "'"), Chr(146), "'"), "' ", " "), " '", " ")
  'Convert to lowercase
  StrIn = " " & LCase(Trim(StrIn)) & " "
  'Process the exclusions list
  For i = 0 To UBound(Split(StrExcl, ","))
    While InStr(StrIn, " " & Split(StrExcl, ",")(i) & " ") > 0
      StrIn = Replace(StrIn, " " & Split(StrExcl, ",")(i) & " ", " ")
    Wend
  Next
  'Clean up any duplicate spaces
  While InStr(StrIn, "  ") > 0
    StrIn = Replace(StrIn, "  ", " ")
  Wend
  StrIn = " " & Trim(StrIn) & " "
  j = UBound(Split(StrIn, " "))
  l = j
  For i = 1 To j
    'Find how many occurences of each word there are in the document
    StrTmp = Split(StrIn, " ")(1)
    While InStr(StrIn, " " & StrTmp & " ") > 0
      StrIn = Replace(StrIn, " " & StrTmp & " ", " ")
    Wend
    'Calculate the number of words replaced
    k = l - UBound(Split(StrIn, " "))
    'Update the output string
    StrOut = StrOut & StrTmp & vbTab & k & vbCr
    l = UBound(Split(StrIn, " "))
    If l = 1 Then Exit For
    DoEvents
  Next
  StrIn = StrOut
  StrOut = ""
  For i = 0 To UBound(Split(StrIn, vbCr)) - 1
    StrTmp = ""
    With .Range
      With .Find
        .ClearFormatting
        .Text = Split(Split(StrIn, vbCr)(i), vbTab)(0)
        .Replacement.Text = ""
        .Forward = True
        .Wrap = wdFindStop
        .Format = False
        .MatchCase = False
        .MatchWholeWord = True
        .MatchWildcards = False
        .MatchSoundsLike = False
        .MatchAllWordForms = False
        .Execute
      End With
      Do While .Find.Found
        StrTmp = StrTmp & " " & .Information(wdActiveEndPageNumber)
        .Collapse (wdCollapseEnd)
        .Find.Execute
      Loop
    End With
    StrTmp = Replace(Trim(StrTmp), " ", ",")
    StrOut = StrOut & Split(StrIn, vbCr)(i) & vbTab & StrTmp & vbCr
  Next
  'Create the concordance table on a new last page
  Set Rng = .Range.Characters.Last
  With Rng
    .InsertAfter vbCr & Chr(12) & StrOut
    .Start = .Start + 2
    .ConvertToTable Separator:=vbTab, Numcolumns:=3
    .Tables(1).Sort Excludeheader:=False, FieldNumber:=1, _
      SortFieldType:=wdSortFieldAlphanumeric, _
      SortOrder:=wdSortOrderAscending, CaseSensitive:=False
  End With
End With
Application.ScreenUpdating = True
End Sub






From   microsoft.com Answers


No comments:

Post a Comment