summaryrefslogtreecommitdiff
path: root/posts/2019-12-22-Fake-News-Detector/index.html
blob: 6eb49ff6f973ae13992bb87a8847593420d5b0ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"/><meta name="og:site_name" content="Navan Chauhan"/><link rel="canonical" href="https://navanchauhan.github.io/posts/2019-12-22-Fake-News-Detector"/><meta name="twitter:url" content="https://navanchauhan.github.io/posts/2019-12-22-Fake-News-Detector"/><meta name="og:url" content="https://navanchauhan.github.io/posts/2019-12-22-Fake-News-Detector"/><title>Building a Fake News Detector with Turicreate | Navan Chauhan</title><meta name="twitter:title" content="Building a Fake News Detector with Turicreate | Navan Chauhan"/><meta name="og:title" content="Building a Fake News Detector with Turicreate | Navan Chauhan"/><meta name="description" content="In this tutorial we will build a fake news detecting app from scratch, using Turicreate for the machine learning model and SwiftUI for building the app"/><meta name="twitter:description" content="In this tutorial we will build a fake news detecting app from scratch, using Turicreate for the machine learning model and SwiftUI for building the app"/><meta name="og:description" content="In this tutorial we will build a fake news detecting app from scratch, using Turicreate for the machine learning model and SwiftUI for building the app"/><meta name="twitter:card" content="summary"/><link rel="stylesheet" href="/styles.css" type="text/css"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><link rel="shortcut icon" href="/images/favicon.png" type="image/png"/><link rel="alternate" href="/feed.rss" type="application/rss+xml" title="Subscribe to Navan Chauhan"/><meta name="twitter:image" content="https://navanchauhan.github.io/images/logo.png"/><meta name="og:image" content="https://navanchauhan.github.io/images/logo.png"/></head><head><script src="https://www.googletagmanager.com/gtag/js?id=UA-108635191-1v"></script><script>window.dataLayer = window.dataLayer || [];function gtag(){dataLayer.push(arguments);}gtag('js', new Date());gtag('config', 'UA-108635191-1');</script></head><body class="item-page"><header><div class="wrapper"><a class="site-name" href="/">Navan Chauhan</a><nav><ul><li><a class="selected" href="/posts">Posts</a></li><li><a href="/publications">Publications</a></li><li><a href="/about">About Me</a></li><li><a href="https://navanchauhan.github.io/repo">Repo</a></li></ul></nav></div></header><div class="wrapper"><article><div class="content"><span class="reading-time">🕑 6 minute read.</span><h1>Building a Fake News Detector with Turicreate</h1><p><strong>In this tutorial we will build a fake news detecting app from scratch, using Turicreate for the machine learning model and SwiftUI for building the app</strong></p><p>Note: These commands are written as if you are running a jupyter notebook.</p><h2>Building the Machine Learning Model</h2><h3>Data Gathering</h3><p>To build a classifier, you need a lot of data. George McIntire (GH: @joolsa) has created a wonderful dataset containing the headline, body and wheter it is fake or real. Whenever you are looking for a dataset, always try searching on Kaggle and GitHub before you start building your own</p><h3>Dependencies</h3><p>I used a Google Colab instance for training my model. If you also plan on using Google Colab then I reccomend choosing a GPU Instance (It is Free) This allows you to train the model on the GPU. Turicreat is built on top of Apache's MXNet Framework, for us to use GPU we need to install a CUDA compatible MXNet package.</p><pre><code>!pip install turicreate
!pip uninstall -y mxnet
!pip install mxnet-cu100==1.4.0.post0
</code></pre><p>If you do not wish to train on GPU or are running it on your computer, you can ignore the last two lines</p><h3>Downloading the Dataset</h3><pre><code>!wget -q "https://github.com/joolsa/fake_real_news_dataset/raw/master/fake_or_real_news.csv.zip"
!unzip fake_or_real_news.csv.zip
</code></pre><h3>Model Creation</h3><pre><code>import turicreate as tc
tc.config.set_num_gpus(-1) # If you do not wish to use GPUs, set it to 0
</code></pre><pre><code>dataSFrame = tc.SFrame('fake_or_real_news.csv')
</code></pre><p>The dataset contains a column named "X1", which is of no use to us. Therefore, we simply drop it</p><pre><code>dataSFrame.remove_column('X1')
</code></pre><h4>Splitting Dataset</h4><pre><code>train, test = dataSFrame.random_split(.9)
</code></pre><h4>Training</h4><pre><code>model = tc.text_classifier.create(
    dataset=train,
    target='label',
    features=['title','text']
)
</code></pre><pre><code>+-----------+----------+-----------+--------------+-------------------+---------------------+
| Iteration | Passes   | Step size | Elapsed Time | Training Accuracy | Validation Accuracy |
+-----------+----------+-----------+--------------+-------------------+---------------------+
| 0         | 2        | 1.000000  | 1.156349     | 0.889680          | 0.790036            |
| 1         | 4        | 1.000000  | 1.359196     | 0.985952          | 0.918149            |
| 2         | 6        | 0.820091  | 1.557205     | 0.990260          | 0.914591            |
| 3         | 7        | 1.000000  | 1.684872     | 0.998689          | 0.925267            |
| 4         | 8        | 1.000000  | 1.814194     | 0.999063          | 0.925267            |
| 9         | 14       | 1.000000  | 2.507072     | 1.000000          | 0.911032            |
+-----------+----------+-----------+--------------+-------------------+---------------------+
</code></pre><h3>Testing the Model</h3><pre><code>est_predictions = model.predict(test)
accuracy = tc.evaluation.accuracy(test['label'], test_predictions)
print(f'Topic classifier model has a testing accuracy of {accuracy*100}% ', flush=True)
</code></pre><pre><code>Topic classifier model has a testing accuracy of 92.3076923076923%
</code></pre><p>We have just created our own Fake News Detection Model which has an accuracy of 92%!</p><pre><code>example_text = {"title": ["Middling ‘Rise Of Skywalker’ Review Leaves Fan On Fence About Whether To Threaten To Kill Critic"], "text": ["Expressing ambivalence toward the relatively balanced appraisal of the film, Star Wars fan Miles Ariely admitted Thursday that an online publication’s middling review of The Rise Of Skywalker had left him on the fence about whether he would still threaten to kill the critic who wrote it. “I’m really of two minds about this, because on the one hand, he said the new movie fails to live up to the original trilogy, which makes me at least want to throw a brick through his window with a note telling him to watch his back,” said Ariely, confirming he had already drafted an eight-page-long death threat to Stan Corimer of the website Screen-On Time, but had not yet decided whether to post it to the reviewer’s Facebook page. “On the other hand, though, he commended J.J. Abrams’ skillful pacing and faithfulness to George Lucas’ vision, which makes me wonder if I should just call the whole thing off. Now, I really don’t feel like camping outside his house for hours. Maybe I could go with a response that’s somewhere in between, like, threatening to kill his dog but not everyone in his whole family? I don’t know. This is a tough one.” At press time, sources reported that Ariely had resolved to wear his Ewok costume while he murdered the critic in his sleep."]}
example_prediction = model.classify(tc.SFrame(example_text))
print(example_prediction, flush=True)
</code></pre><pre><code>+-------+--------------------+
| class |    probability     |
+-------+--------------------+
|  FAKE | 0.9245648658345308 |
+-------+--------------------+
[1 rows x 2 columns]
</code></pre><h3>Exporting the Model</h3><pre><code>model_name = 'FakeNews'
coreml_model_name = model_name + '.mlmodel'
exportedModel = model.export_coreml(coreml_model_name)
</code></pre><p><strong>Note: To download files from Google Volab, simply click on the files section in the sidebar, right click on filename and then click on downlaod</strong></p><p><a href="https://colab.research.google.com/drive/1onMXGkhA__X2aOFdsoVL-6HQBsWQhOP4">Link to Colab Notebook</a></p><h2>Building the App using SwiftUI</h2><h3>Initial Setup</h3><p>First we create a single view app (make sure you check the use SwiftUI button)</p><p>Then we copy our .mlmodel file to our project (Just drag and drop the file in the XCode Files Sidebar)</p><p>Our ML Model does not take a string directly as an input, rather it takes bag of words as an input. DescriptionThe bag-of-words model is a simplifying representation used in NLP, in this text is represented as a bag of words, without any regatd of grammar or order, but noting multiplicity</p><p>We define our bag of words function</p><pre><code>func bow(text: String) -&gt; [String: Double] {
        var bagOfWords = [String: Double]()
        
        let tagger = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0)
        let range = NSRange(location: 0, length: text.utf16.count)
        let options: NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace]
        tagger.string = text
        
        tagger.enumerateTags(in: range, unit: .word, scheme: .tokenType, options: options) { _, tokenRange, _ in
            let word = (text as NSString).substring(with: tokenRange)
            if bagOfWords[word] != nil {
                bagOfWords[word]! += 1
            } else {
                bagOfWords[word] = 1
            }
        }
        
        return bagOfWords
    }
</code></pre><p>We also declare our variables</p><pre><code>@State private var title: String = ""
@State private var headline: String = ""
@State private var alertTitle = ""
@State private var alertText = ""
@State private var showingAlert = false
</code></pre><p>Finally, we implement a simple function which reads the two text fields, creates their bag of words representation and displays an alert with the appropriate result</p><p><strong>Complete Code</strong></p><pre><code>import SwiftUI

struct ContentView: View {
    @State private var title: String = ""
    @State private var headline: String = ""
    
    @State private var alertTitle = ""
    @State private var alertText = ""
    @State private var showingAlert = false
    
    var body: some View {
        NavigationView {
            VStack(alignment: .leading) {
                Text("Headline").font(.headline)
                TextField("Please Enter Headline", text: $title)
                    .lineLimit(nil)
                Text("Body").font(.headline)
                TextField("Please Enter the content", text: $headline)
                .lineLimit(nil)
            }
                .navigationBarTitle("Fake News Checker")
            .navigationBarItems(trailing:
                Button(action: classifyFakeNews) {
                    Text("Check")
                })
            .padding()
                .alert(isPresented: $showingAlert){
                    Alert(title: Text(alertTitle), message: Text(alertText), dismissButton: .default(Text("OK")))
            }
        }
        
    }
    
    func classifyFakeNews(){
        let model = FakeNews()
        let myTitle = bow(text: title)
        let myText = bow(text: headline)
        do {
            let prediction = try model.prediction(title: myTitle, text: myText)
            alertTitle = prediction.label
            alertText = "It is likely that this piece of news is \(prediction.label.lowercased())."
            print(alertText)
        } catch {
            alertTitle = "Error"
            alertText = "Sorry, could not classify if the input news was fake or not."
        }
        
        showingAlert = true
    }
    func bow(text: String) -&gt; [String: Double] {
        var bagOfWords = [String: Double]()
        
        let tagger = NSLinguisticTagger(tagSchemes: [.tokenType], options: 0)
        let range = NSRange(location: 0, length: text.utf16.count)
        let options: NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace]
        tagger.string = text
        
        tagger.enumerateTags(in: range, unit: .word, scheme: .tokenType, options: options) { _, tokenRange, _ in
            let word = (text as NSString).substring(with: tokenRange)
            if bagOfWords[word] != nil {
                bagOfWords[word]! += 1
            } else {
                bagOfWords[word] = 1
            }
        }
        
        return bagOfWords
    }
}

struct ContentView_Previews: PreviewProvider {
    static var previews: some View {
        ContentView()
    }
}

</code></pre></div><span>Tagged with: </span><ul class="tag-list"><li><a href="/tags/tutorial">tutorial</a></li><li><a href="/tags/colab">colab</a></li><li><a href="/tags/swiftui">swiftUI</a></li><li><a href="/tags/turicreate">turicreate</a></li></ul></article></div><footer><p>Made with ❤️ using <a href="https://github.com/johnsundell/publish">Publish</a></p><p><a href="/feed.rss">RSS feed</a></p></footer></body></html>