The vb script below will create an index of every word from a document, (directions for setting up macros)
Sub ConcordanceBuilder()
Application.ScreenUpdating = False
Dim StrIn As String, StrOut As String, StrTmp As String, StrExcl As String
Dim i As Long, j As Long, k As Long, l As Long, Rng As Range
'Define the exlusions list
StrExcl = "a,am,an,and,are,as,at,b,be,but,by,c,can,cm,d,did," & _
"do,does,e,eg,en,eq,etc,f,for,g,get,go,got,h,has,have," & _
"he,her,him,how,i,ie,if,in,into,is,it,its,j,k,l,m,me," & _
"mi,mm,my,n,na,nb,no,not,o,of,off,ok,on,one,or,our,out," & _
"p,q,r,re,s,she,so,t,the,their,them,they,this,t,to,u,v," & _
"via,vs,w,was,we,were,who,will,with,would,x,y,yd,you,your,z"
With ActiveDocument
'Get the document's text
StrIn = .Content.Text
'Strip out unwanted characters. Amongst others, hyphens and formatted single quotes are retained at this stage
For i = 1 To 255
Select Case i
Case 1 To 35, 37 to 38, 40 To 43, 45, 47, 58 To 64, 91 To 96,
123 To 127, 129 To 144, 147 To 149, 152 To 162, 164, 166 To 171, 174 To
191, 247
StrIn = Replace(StrIn, Chr(i), " ")
End Select
Next
'Delete any periods or commas at the end of a word. Formatted numbers are thus retained.
StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(44) & Chr(32), "
"), Chr(44) & vbCr, " "), Chr(46) & Chr(32), " "), Chr(46)
& vbCr, " ")
'Convert smart single quotes to plain single quotes & delete any at the start/end of a word
StrIn = Replace(Replace(Replace(Replace(StrIn, Chr(145), "'"), Chr(146), "'"), "' ", " "), " '", " ")
'Convert to lowercase
StrIn = " " & LCase(Trim(StrIn)) & " "
'Process the exclusions list
For i = 0 To UBound(Split(StrExcl, ","))
While InStr(StrIn, " " & Split(StrExcl, ",")(i) & " ") > 0
StrIn = Replace(StrIn, " " & Split(StrExcl, ",")(i) & " ", " ")
Wend
Next
'Clean up any duplicate spaces
While InStr(StrIn, " ") > 0
StrIn = Replace(StrIn, " ", " ")
Wend
StrIn = " " & Trim(StrIn) & " "
j = UBound(Split(StrIn, " "))
l = j
For i = 1 To j
'Find how many occurences of each word there are in the document
StrTmp = Split(StrIn, " ")(1)
While InStr(StrIn, " " & StrTmp & " ") > 0
StrIn = Replace(StrIn, " " & StrTmp & " ", " ")
Wend
'Calculate the number of words replaced
k = l - UBound(Split(StrIn, " "))
'Update the output string
StrOut = StrOut & StrTmp & vbTab & k & vbCr
l = UBound(Split(StrIn, " "))
If l = 1 Then Exit For
DoEvents
Next
StrIn = StrOut
StrOut = ""
For i = 0 To UBound(Split(StrIn, vbCr)) - 1
StrTmp = ""
With .Range
With .Find
.ClearFormatting
.Text = Split(Split(StrIn, vbCr)(i), vbTab)(0)
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWholeWord = True
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
.Execute
End With
Do While .Find.Found
StrTmp = StrTmp & " " & .Information(wdActiveEndPageNumber)
.Collapse (wdCollapseEnd)
.Find.Execute
Loop
End With
StrTmp = Replace(Trim(StrTmp), " ", ",")
StrOut = StrOut & Split(StrIn, vbCr)(i) & vbTab & StrTmp & vbCr
Next
'Create the concordance table on a new last page
Set Rng = .Range.Characters.Last
With Rng
.InsertAfter vbCr & Chr(12) & StrOut
.Start = .Start + 2
.ConvertToTable Separator:=vbTab, Numcolumns:=3
.Tables(1).Sort Excludeheader:=False, FieldNumber:=1, _
SortFieldType:=wdSortFieldAlphanumeric, _
SortOrder:=wdSortOrderAscending, CaseSensitive:=False
End With
End With
Application.ScreenUpdating = True
End Sub
From microsoft.com Answers
No comments:
Post a Comment