Document (#27399)

Author
Brückner, T.
Dambeck, H.
Title
Sortierautomaten : Grundlagen der Textklassifizierung
Source
c't. 2003, H.19, S.192-197
Year
2003
Abstract
Rechnung, Kündigung oder Adressänderung? Eingehende Briefe und E-Mails werden immer häufiger von Software statt aufwändig von Menschenhand sortiert. Die Textklassifizierer arbeiten erstaunlich genau. Sie fahnden auch nach ähnlichen Texten und sorgen so für einen schnellen Überblick. Ihre Werkzeuge sind Linguistik, Statistik und Logik
Content
Mehrere grafische Darstellungen
Theme
Automatisches Klassifizieren
Data Mining

Similar documents (content)

  1. Patzig, G.: Sprache und Logik (1981) 0.09
    0.08932459 = sum of:
      0.08932459 = product of:
        0.3190164 = sum of:
          0.010719019 = weight(abstract_txt:einen in 5344) [ClassicSimilarity], result of:
            0.010719019 = score(doc=5344,freq=1.0), product of:
              0.06433039 = queryWeight, product of:
                4.2655873 = idf(docFreq=1687, maxDocs=44218)
                0.01508125 = queryNorm
              0.1666245 = fieldWeight in 5344, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.2655873 = idf(docFreq=1687, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5344)
          0.011369878 = weight(abstract_txt:nach in 5344) [ClassicSimilarity], result of:
            0.011369878 = score(doc=5344,freq=1.0), product of:
              0.06690882 = queryWeight, product of:
                1.0198437 = boost
                4.350232 = idf(docFreq=1550, maxDocs=44218)
                0.01508125 = queryNorm
              0.16993093 = fieldWeight in 5344, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.350232 = idf(docFreq=1550, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5344)
          0.015591629 = weight(abstract_txt:ihre in 5344) [ClassicSimilarity], result of:
            0.015591629 = score(doc=5344,freq=1.0), product of:
              0.08258613 = queryWeight, product of:
                1.1330405 = boost
                4.8330836 = idf(docFreq=956, maxDocs=44218)
                0.01508125 = queryNorm
              0.18879233 = fieldWeight in 5344, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.8330836 = idf(docFreq=956, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5344)
          0.017377954 = weight(abstract_txt:immer in 5344) [ClassicSimilarity], result of:
            0.017377954 = score(doc=5344,freq=1.0), product of:
              0.08877934 = queryWeight, product of:
                1.1747565 = boost
                5.0110264 = idf(docFreq=800, maxDocs=44218)
                0.01508125 = queryNorm
              0.19574322 = fieldWeight in 5344, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0110264 = idf(docFreq=800, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5344)
          0.0333086 = weight(abstract_txt:grundlagen in 5344) [ClassicSimilarity], result of:
            0.0333086 = score(doc=5344,freq=1.0), product of:
              0.13698837 = queryWeight, product of:
                1.4592637 = boost
                6.2246165 = idf(docFreq=237, maxDocs=44218)
                0.01508125 = queryNorm
              0.24314909 = fieldWeight in 5344, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.2246165 = idf(docFreq=237, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5344)
          0.16162975 = weight(abstract_txt:logik in 5344) [ClassicSimilarity], result of:
            0.16162975 = score(doc=5344,freq=7.0), product of:
              0.20525527 = queryWeight, product of:
                1.786237 = boost
                7.61935 = idf(docFreq=58, maxDocs=44218)
                0.01508125 = queryNorm
              0.7874572 = fieldWeight in 5344, product of:
                2.6457512 = tf(freq=7.0), with freq of:
                  7.0 = termFreq=7.0
                7.61935 = idf(docFreq=58, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5344)
          0.069019556 = weight(abstract_txt:sorgen in 5344) [ClassicSimilarity], result of:
            0.069019556 = score(doc=5344,freq=1.0), product of:
              0.2226525 = queryWeight, product of:
                1.8603973 = boost
                7.935687 = idf(docFreq=42, maxDocs=44218)
                0.01508125 = queryNorm
              0.30998778 = fieldWeight in 5344, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.935687 = idf(docFreq=42, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5344)
        0.28 = coord(7/25)
    
  2. Heyer, G.; Quasthoff, U.; Wittig, T.: Text Mining : Wissensrohstoff Text. Konzepte, Algorithmen, Ergebnisse (2006) 0.08
    0.08370124 = sum of:
      0.08370124 = product of:
        0.34875515 = sum of:
          0.015158982 = weight(abstract_txt:einen in 5218) [ClassicSimilarity], result of:
            0.015158982 = score(doc=5218,freq=2.0), product of:
              0.06433039 = queryWeight, product of:
                4.2655873 = idf(docFreq=1687, maxDocs=44218)
                0.01508125 = queryNorm
              0.23564263 = fieldWeight in 5218, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.2655873 = idf(docFreq=1687, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5218)
          0.030821657 = weight(abstract_txt:arbeiten in 5218) [ClassicSimilarity], result of:
            0.030821657 = score(doc=5218,freq=1.0), product of:
              0.13008188 = queryWeight, product of:
                1.4220023 = boost
                6.0656753 = idf(docFreq=278, maxDocs=44218)
                0.01508125 = queryNorm
              0.23694044 = fieldWeight in 5218, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.0656753 = idf(docFreq=278, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5218)
          0.0666172 = weight(abstract_txt:grundlagen in 5218) [ClassicSimilarity], result of:
            0.0666172 = score(doc=5218,freq=4.0), product of:
              0.13698837 = queryWeight, product of:
                1.4592637 = boost
                6.2246165 = idf(docFreq=237, maxDocs=44218)
                0.01508125 = queryNorm
              0.48629817 = fieldWeight in 5218, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                6.2246165 = idf(docFreq=237, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5218)
          0.07281186 = weight(abstract_txt:texten in 5218) [ClassicSimilarity], result of:
            0.07281186 = score(doc=5218,freq=2.0), product of:
              0.18313481 = queryWeight, product of:
                1.6872419 = boost
                7.1970778 = idf(docFreq=89, maxDocs=44218)
                0.01508125 = queryNorm
              0.3975861 = fieldWeight in 5218, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                7.1970778 = idf(docFreq=89, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5218)
          0.069019556 = weight(abstract_txt:linguistik in 5218) [ClassicSimilarity], result of:
            0.069019556 = score(doc=5218,freq=1.0), product of:
              0.2226525 = queryWeight, product of:
                1.8603973 = boost
                7.935687 = idf(docFreq=42, maxDocs=44218)
                0.01508125 = queryNorm
              0.30998778 = fieldWeight in 5218, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.935687 = idf(docFreq=42, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5218)
          0.09432589 = weight(abstract_txt:statistik in 5218) [ClassicSimilarity], result of:
            0.09432589 = score(doc=5218,freq=1.0), product of:
              0.27419958 = queryWeight, product of:
                2.0645494 = boost
                8.806516 = idf(docFreq=17, maxDocs=44218)
                0.01508125 = queryNorm
              0.3440045 = fieldWeight in 5218, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.806516 = idf(docFreq=17, maxDocs=44218)
                0.0390625 = fieldNorm(doc=5218)
        0.24 = coord(6/25)
    
  3. Internet Adressen : die 'Gelben Seiten' für das Internet (1996) 0.08
    0.08150601 = sum of:
      0.08150601 = product of:
        0.67921674 = sum of:
          0.062366515 = weight(abstract_txt:ihre in 4469) [ClassicSimilarity], result of:
            0.062366515 = score(doc=4469,freq=1.0), product of:
              0.08258613 = queryWeight, product of:
                1.1330405 = boost
                4.8330836 = idf(docFreq=956, maxDocs=44218)
                0.01508125 = queryNorm
              0.75516933 = fieldWeight in 4469, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.8330836 = idf(docFreq=956, maxDocs=44218)
                0.15625 = fieldNorm(doc=4469)
          0.26475596 = weight(abstract_txt:schnellen in 4469) [ClassicSimilarity], result of:
            0.26475596 = score(doc=4469,freq=1.0), product of:
              0.21652265 = queryWeight, product of:
                1.8346093 = boost
                7.825686 = idf(docFreq=47, maxDocs=44218)
                0.01508125 = queryNorm
              1.2227634 = fieldWeight in 4469, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.825686 = idf(docFreq=47, maxDocs=44218)
                0.15625 = fieldNorm(doc=4469)
          0.35209426 = weight(abstract_txt:sortiert in 4469) [ClassicSimilarity], result of:
            0.35209426 = score(doc=4469,freq=1.0), product of:
              0.2618457 = queryWeight, product of:
                2.0175052 = boost
                8.6058445 = idf(docFreq=21, maxDocs=44218)
                0.01508125 = queryNorm
              1.3446631 = fieldWeight in 4469, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.6058445 = idf(docFreq=21, maxDocs=44218)
                0.15625 = fieldNorm(doc=4469)
        0.12 = coord(3/25)
    
  4. Lanvent, A.: Licht im Daten Chaos (2004) 0.08
    0.08056804 = sum of:
      0.08056804 = product of:
        0.6714003 = sum of:
          0.05457542 = weight(abstract_txt:nach in 2806) [ClassicSimilarity], result of:
            0.05457542 = score(doc=2806,freq=1.0), product of:
              0.06690882 = queryWeight, product of:
                1.0198437 = boost
                4.350232 = idf(docFreq=1550, maxDocs=44218)
                0.01508125 = queryNorm
              0.8156685 = fieldWeight in 2806, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.350232 = idf(docFreq=1550, maxDocs=44218)
                0.1875 = fieldNorm(doc=2806)
          0.24713163 = weight(abstract_txt:texten in 2806) [ClassicSimilarity], result of:
            0.24713163 = score(doc=2806,freq=1.0), product of:
              0.18313481 = queryWeight, product of:
                1.6872419 = boost
                7.1970778 = idf(docFreq=89, maxDocs=44218)
                0.01508125 = queryNorm
              1.349452 = fieldWeight in 2806, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.1970778 = idf(docFreq=89, maxDocs=44218)
                0.1875 = fieldNorm(doc=2806)
          0.36969328 = weight(abstract_txt:mails in 2806) [ClassicSimilarity], result of:
            0.36969328 = score(doc=2806,freq=1.0), product of:
              0.2395409 = queryWeight, product of:
                1.9296643 = boost
                8.231152 = idf(docFreq=31, maxDocs=44218)
                0.01508125 = queryNorm
              1.5433409 = fieldWeight in 2806, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.231152 = idf(docFreq=31, maxDocs=44218)
                0.1875 = fieldNorm(doc=2806)
        0.12 = coord(3/25)
    
  5. Krüger, J.: Wie der Mensch die Kontrolle über den Algorithmus behalten kann 0.08
    0.076745614 = sum of:
      0.076745614 = product of:
        0.47966012 = sum of:
          0.045662276 = weight(abstract_txt:software in 4061) [ClassicSimilarity], result of:
            0.045662276 = score(doc=4061,freq=1.0), product of:
              0.06708795 = queryWeight, product of:
                1.0212079 = boost
                4.3560514 = idf(docFreq=1541, maxDocs=44218)
                0.01508125 = queryNorm
              0.68063307 = fieldWeight in 4061, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.3560514 = idf(docFreq=1541, maxDocs=44218)
                0.15625 = fieldNorm(doc=4061)
          0.069511816 = weight(abstract_txt:immer in 4061) [ClassicSimilarity], result of:
            0.069511816 = score(doc=4061,freq=1.0), product of:
              0.08877934 = queryWeight, product of:
                1.1747565 = boost
                5.0110264 = idf(docFreq=800, maxDocs=44218)
                0.01508125 = queryNorm
              0.7829729 = fieldWeight in 4061, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0110264 = idf(docFreq=800, maxDocs=44218)
                0.15625 = fieldNorm(doc=4061)
          0.10777116 = weight(abstract_txt:überblick in 4061) [ClassicSimilarity], result of:
            0.10777116 = score(doc=4061,freq=1.0), product of:
              0.11892538 = queryWeight, product of:
                1.3596565 = boost
                5.799733 = idf(docFreq=363, maxDocs=44218)
                0.01508125 = queryNorm
              0.9062083 = fieldWeight in 4061, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.799733 = idf(docFreq=363, maxDocs=44218)
                0.15625 = fieldNorm(doc=4061)
          0.25671488 = weight(abstract_txt:häufiger in 4061) [ClassicSimilarity], result of:
            0.25671488 = score(doc=4061,freq=1.0), product of:
              0.21211603 = queryWeight, product of:
                1.8158445 = boost
                7.7456436 = idf(docFreq=51, maxDocs=44218)
                0.01508125 = queryNorm
              1.2102568 = fieldWeight in 4061, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.7456436 = idf(docFreq=51, maxDocs=44218)
                0.15625 = fieldNorm(doc=4061)
        0.16 = coord(4/25)