Document (#25051)

Froissart, C.
Lallich-Boidin, G.
Towards structuring of indexing vocabulary for large technical documents
Structures and relations in knowledge organization: Proceedings of the 5th International ISKO-Conference, Lille, 25.-29.8.1998. Ed.: W. Mustafa el Hadi et al
Würzburg : Ergon
Advances in knowledge organization; vol.6
This paper deals with indexing of large textual and structured documents. We limit our area to technical documents like maintenance and users manuals. This firstly implies, that the document describes a closed world, and then that they are used by experts in this area. We suggest a methodology to extract the indexing vocabulary from the text with linguistic and numeric tools and then to structure the vocabulary, as a thesaurus might. We aim at assisting the user in order that he retrieves quickly the only text passages he needs

Similar documents (content)

  1. Theory of subject analysis : A sourcebook (1985) 0.16
    0.1623415 = sum of:
      0.1623415 = product of:
        0.4509486 = sum of:
          0.055682387 = weight(abstract_txt:structuring in 3622) [ClassicSimilarity], result of:
            0.055682387 = score(doc=3622,freq=2.0), product of:
              0.18213235 = queryWeight, product of:
                1.1587467 = boost
                6.9177637 = idf(docFreq=118, maxDocs=44218)
                0.022721283 = queryNorm
              0.30572486 = fieldWeight in 3622, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.9177637 = idf(docFreq=118, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.017760184 = weight(abstract_txt:that in 3622) [ClassicSimilarity], result of:
            0.017760184 = score(doc=3622,freq=14.0), product of:
              0.064103425 = queryWeight, product of:
                1.1906831 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.022721283 = queryNorm
              0.27705514 = fieldWeight in 3622, product of:
                3.7416575 = tf(freq=14.0), with freq of:
                  14.0 = termFreq=14.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.012279593 = weight(abstract_txt:this in 3622) [ClassicSimilarity], result of:
            0.012279593 = score(doc=3622,freq=6.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.18470818 = fieldWeight in 3622, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.06789896 = weight(abstract_txt:passages in 3622) [ClassicSimilarity], result of:
            0.06789896 = score(doc=3622,freq=1.0), product of:
              0.26191515 = queryWeight, product of:
                1.3895535 = boost
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.022721283 = queryNorm
              0.2592403 = fieldWeight in 3622, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.042139452 = weight(abstract_txt:technical in 3622) [ClassicSimilarity], result of:
            0.042139452 = score(doc=3622,freq=2.0), product of:
              0.19056559 = queryWeight, product of:
                1.6762246 = boost
                5.0035634 = idf(docFreq=806, maxDocs=44218)
                0.022721283 = queryNorm
              0.22112834 = fieldWeight in 3622, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.0035634 = idf(docFreq=806, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.030224781 = weight(abstract_txt:area in 3622) [ClassicSimilarity], result of:
            0.030224781 = score(doc=3622,freq=1.0), product of:
              0.19238475 = queryWeight, product of:
                1.6842064 = boost
                5.027389 = idf(docFreq=787, maxDocs=44218)
                0.022721283 = queryNorm
              0.15710591 = fieldWeight in 3622, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.027389 = idf(docFreq=787, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.04326045 = weight(abstract_txt:documents in 3622) [ClassicSimilarity], result of:
            0.04326045 = score(doc=3622,freq=3.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.22307204 = fieldWeight in 3622, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.071919516 = weight(abstract_txt:indexing in 3622) [ClassicSimilarity], result of:
            0.071919516 = score(doc=3622,freq=6.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.33294594 = fieldWeight in 3622, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.1097833 = weight(abstract_txt:vocabulary in 3622) [ClassicSimilarity], result of:
            0.1097833 = score(doc=3622,freq=4.0), product of:
              0.32781574 = queryWeight, product of:
                2.6925917 = boost
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.022721283 = queryNorm
              0.33489332 = fieldWeight in 3622, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
        0.36 = coord(9/25)
  2. Dumais, S.T.: Latent semantic analysis (2003) 0.16
    0.16185302 = sum of:
      0.16185302 = product of:
        0.4495917 = sum of:
          0.014239824 = weight(abstract_txt:that in 2462) [ClassicSimilarity], result of:
            0.014239824 = score(doc=2462,freq=9.0), product of:
              0.064103425 = queryWeight, product of:
                1.1906831 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.022721283 = queryNorm
              0.22213829 = fieldWeight in 2462, product of:
                3.0 = tf(freq=9.0), with freq of:
                  9.0 = termFreq=9.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.044544633 = weight(abstract_txt:limit in 2462) [ClassicSimilarity], result of:
            0.044544633 = score(doc=2462,freq=1.0), product of:
              0.19774953 = queryWeight, product of:
                1.2074043 = boost
                7.208251 = idf(docFreq=88, maxDocs=44218)
                0.022721283 = queryNorm
              0.22525784 = fieldWeight in 2462, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.208251 = idf(docFreq=88, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.007089626 = weight(abstract_txt:this in 2462) [ClassicSimilarity], result of:
            0.007089626 = score(doc=2462,freq=2.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.106641315 = fieldWeight in 2462, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.03853024 = weight(abstract_txt:text in 2462) [ClassicSimilarity], result of:
            0.03853024 = score(doc=2462,freq=6.0), product of:
              0.12447418 = queryWeight, product of:
                1.3547202 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.022721283 = queryNorm
              0.30954406 = fieldWeight in 2462, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.06789896 = weight(abstract_txt:passages in 2462) [ClassicSimilarity], result of:
            0.06789896 = score(doc=2462,freq=1.0), product of:
              0.26191515 = queryWeight, product of:
                1.3895535 = boost
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.022721283 = queryNorm
              0.2592403 = fieldWeight in 2462, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.04699995 = weight(abstract_txt:large in 2462) [ClassicSimilarity], result of:
            0.04699995 = score(doc=2462,freq=5.0), product of:
              0.15100922 = queryWeight, product of:
                1.4921473 = boost
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.022721283 = queryNorm
              0.31123894 = fieldWeight in 2462, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.07898243 = weight(abstract_txt:documents in 2462) [ClassicSimilarity], result of:
            0.07898243 = score(doc=2462,freq=10.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.40727198 = fieldWeight in 2462, product of:
                3.1622777 = tf(freq=10.0), with freq of:
                  10.0 = termFreq=10.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.041522752 = weight(abstract_txt:indexing in 2462) [ClassicSimilarity], result of:
            0.041522752 = score(doc=2462,freq=2.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.19222642 = fieldWeight in 2462, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.1097833 = weight(abstract_txt:vocabulary in 2462) [ClassicSimilarity], result of:
            0.1097833 = score(doc=2462,freq=4.0), product of:
              0.32781574 = queryWeight, product of:
                2.6925917 = boost
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.022721283 = queryNorm
              0.33489332 = fieldWeight in 2462, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
        0.36 = coord(9/25)
  3. Mounier, E.; Paganelli, C.: Text structure and information retrieval in large documents (1998) 0.15
    0.14855348 = sum of:
      0.14855348 = product of:
        0.61897284 = sum of:
          0.10122721 = weight(abstract_txt:textual in 66) [ClassicSimilarity], result of:
            0.10122721 = score(doc=66,freq=1.0), product of:
              0.13564695 = queryWeight, product of:
                5.9700394 = idf(docFreq=306, maxDocs=44218)
                0.022721283 = queryNorm
              0.7462549 = fieldWeight in 66, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.9700394 = idf(docFreq=306, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.13931432 = weight(abstract_txt:deals in 66) [ClassicSimilarity], result of:
            0.13931432 = score(doc=66,freq=1.0), product of:
              0.16783237 = queryWeight, product of:
                1.1123279 = boost
                6.640641 = idf(docFreq=156, maxDocs=44218)
                0.022721283 = queryNorm
              0.83008015 = fieldWeight in 66, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.640641 = idf(docFreq=156, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.028358504 = weight(abstract_txt:this in 66) [ClassicSimilarity], result of:
            0.028358504 = score(doc=66,freq=2.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.42656526 = fieldWeight in 66, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.08407606 = weight(abstract_txt:large in 66) [ClassicSimilarity], result of:
            0.08407606 = score(doc=66,freq=1.0), product of:
              0.15100922 = queryWeight, product of:
                1.4921473 = boost
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.022721283 = queryNorm
              0.55676115 = fieldWeight in 66, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.099905744 = weight(abstract_txt:documents in 66) [ClassicSimilarity], result of:
            0.099905744 = score(doc=66,freq=1.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.5151628 = fieldWeight in 66, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.16609101 = weight(abstract_txt:indexing in 66) [ClassicSimilarity], result of:
            0.16609101 = score(doc=66,freq=2.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.7689057 = fieldWeight in 66, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
        0.24 = coord(6/25)
  4. Anderson, J.D.: Guidelines for indexes and related information retrieval devices (1997) 0.15
    0.14747187 = sum of:
      0.14747187 = product of:
        0.61446613 = sum of:
          0.08707145 = weight(abstract_txt:deals in 3807) [ClassicSimilarity], result of:
            0.08707145 = score(doc=3807,freq=1.0), product of:
              0.16783237 = queryWeight, product of:
                1.1123279 = boost
                6.640641 = idf(docFreq=156, maxDocs=44218)
                0.022721283 = queryNorm
              0.5188001 = fieldWeight in 3807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.640641 = idf(docFreq=156, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.012532807 = weight(abstract_txt:this in 3807) [ClassicSimilarity], result of:
            0.012532807 = score(doc=3807,freq=1.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.18851699 = fieldWeight in 3807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.10534863 = weight(abstract_txt:technical in 3807) [ClassicSimilarity], result of:
            0.10534863 = score(doc=3807,freq=2.0), product of:
              0.19056559 = queryWeight, product of:
                1.6762246 = boost
                5.0035634 = idf(docFreq=806, maxDocs=44218)
                0.022721283 = queryNorm
              0.55282086 = fieldWeight in 3807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.0035634 = idf(docFreq=806, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.088305034 = weight(abstract_txt:documents in 3807) [ClassicSimilarity], result of:
            0.088305034 = score(doc=3807,freq=2.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.4553439 = fieldWeight in 3807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.12713693 = weight(abstract_txt:indexing in 3807) [ClassicSimilarity], result of:
            0.12713693 = score(doc=3807,freq=3.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.5885708 = fieldWeight in 3807, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.1940713 = weight(abstract_txt:vocabulary in 3807) [ClassicSimilarity], result of:
            0.1940713 = score(doc=3807,freq=2.0), product of:
              0.32781574 = queryWeight, product of:
                2.6925917 = boost
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.022721283 = queryNorm
              0.59201336 = fieldWeight in 3807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
        0.24 = coord(6/25)
  5. Wolfram, D.; Olson, H.A.; Bloom, R.: Measuring consistency for multiple taggers using vector space modeling (2009) 0.15
    0.14627464 = sum of:
      0.14627464 = product of:
        0.52240944 = sum of:
          0.020553418 = weight(abstract_txt:that in 3113) [ClassicSimilarity], result of:
            0.020553418 = score(doc=3113,freq=3.0), product of:
              0.064103425 = queryWeight, product of:
                1.1906831 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.022721283 = queryNorm
              0.320629 = fieldWeight in 3113, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.012532807 = weight(abstract_txt:this in 3113) [ClassicSimilarity], result of:
            0.012532807 = score(doc=3113,freq=1.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.18851699 = fieldWeight in 3113, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.052547544 = weight(abstract_txt:large in 3113) [ClassicSimilarity], result of:
            0.052547544 = score(doc=3113,freq=1.0), product of:
              0.15100922 = queryWeight, product of:
                1.4921473 = boost
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.022721283 = queryNorm
              0.34797573 = fieldWeight in 3113, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.106860735 = weight(abstract_txt:area in 3113) [ClassicSimilarity], result of:
            0.106860735 = score(doc=3113,freq=2.0), product of:
              0.19238475 = queryWeight, product of:
                1.6842064 = boost
                5.027389 = idf(docFreq=787, maxDocs=44218)
                0.022721283 = queryNorm
              0.55545324 = fieldWeight in 3113, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.027389 = idf(docFreq=787, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.062441092 = weight(abstract_txt:documents in 3113) [ClassicSimilarity], result of:
            0.062441092 = score(doc=3113,freq=1.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.32197678 = fieldWeight in 3113, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.07340255 = weight(abstract_txt:indexing in 3113) [ClassicSimilarity], result of:
            0.07340255 = score(doc=3113,freq=1.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.3398115 = fieldWeight in 3113, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.1940713 = weight(abstract_txt:vocabulary in 3113) [ClassicSimilarity], result of:
            0.1940713 = score(doc=3113,freq=2.0), product of:
              0.32781574 = queryWeight, product of:
                2.6925917 = boost
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.022721283 = queryNorm
              0.59201336 = fieldWeight in 3113, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
        0.28 = coord(7/25)