GO語言利用K近鄰演算法實現小說鑒黃_Golang

來源:互聯網
上載者:User

Usuage:
  go run kNN.go --file="data.txt"

關鍵是向量點的選擇和閾值的判定
樣本資料來自國家新聞出版總署發布通知公布的《40部淫穢色情網路小說名單》

package main import (  "bufio"  "flag"  "fmt"  "io"  "log"  "math"  "os"  "path"  "path/filepath") var debug bool = falsevar data_dir string = "./moyan"    //檔案存放目錄var limen float64 = 0.1159203888322267 //閾值 const (  MIN_HANZI rune = 0x3400  MAX_HANZI rune = 0x9fbb) var labels []rune = []rune{  0x817f, 0x80f8, 0x4e73, 0x81c0,  0x5c41, 0x80a1, 0x88f8, 0x6deb,} func errHandle(err error) {  if err != nil {    log.Fatal(err)  }} func load(name string) (m map[rune]int, err error) {  f, err := os.Open(name)  if err != nil {    return nil, err  }  defer f.Close()  buf := bufio.NewReader(f)  m = make(map[rune]int)  var r rune  for {    r, _, err = buf.ReadRune()    if err != nil {      if err == io.EOF {        break      }      return nil, err    }    if r >= MIN_HANZI && r <= MAX_HANZI {      m[r] += 1    }  }  return m, nil}func classify(m map[rune]int) (idv []float64, dis float64) {  len_m := len(m)  for i, v := range labels {    if debug {      fmt.Println(i, m[v], string(v), float64(m[v])/float64(len_m))    }    idv = append(idv, float64(m[v])/float64(len_m))  }  for _, v := range idv {    dis += math.Pow(v, 2)  }  dis = math.Sqrt(dis)  return}func check(fp string, dis float64) {  switch {  case dis >= limen:    fmt.Println(fp, dis, "涉黃")  case dis == 1.0:    fmt.Println(fp, dis, "你在作弊嗎")  case dis == 0:    fmt.Println(fp, dis, "檢查一下檔案字元編碼是不是utf8格式吧")  default:    fmt.Println(fp, dis, "正常")  }} func walkFunc(fp string, info os.FileInfo, err error) error {  if path.Ext(fp) == ".txt" {    m, err := load(fp)    errHandle(err)    _, dis := classify(m)    check(fp, dis)  }  return err} var file string func init() {  _, err := os.Stat(data_dir)  if err != nil {    err = os.Mkdir(data_dir, os.ModePerm)    errHandle(err)  }  flag.StringVar(&file, "file", "", "file read in,if you don't give the file read in,"+    "it will create a data dictionary,just pust your files in it")} func main() {  flag.Parse()  if file == "" {    filepath.Walk(data_dir, walkFunc)    return  }  m, err := load(file)  errHandle(err)  _, dis := classify(m)  check(file, dis) }

以上所述就是本文的全部內容了,希望大家能夠喜歡。

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.