Using Node and Mongo to Collect Data from Reddit

I am taking a course on MongoDB development with Node.js from Mongo University. In the second week we covered a thing that I thought was very interesting. They walked you through how to grab the JSON data out of a Reddit page. Reddit apparently offers its data up as a JSON if you pass it a .json path. Here is the coffeescript that produces the code provided in the development course.

MongoClient = require('mongodb').MongoClient
request = require('request')

MongoClient.connect 'mongodb://localhost:27017/reddit', ( err, db ) ->
    throw err if err
    request 'http://www.reddit.com/r/technology/.json', ( err, response, body ) ->
        if !err && response.statusCode == 200
            obj = JSON.parse body
            stories = obj.data.children.map (story) ->
                story.data
            db.collection 'reddit' 
                .insert stories, ( err, data ) ->
                    throw err if err 
                    console.dir data
                    db.close()

In order to run this, you will need to have a MongoDB server running in another terminal. Simply run,

mongo --dbpath /data/db

This will grab the data from the front page of the technology subreddit, and then store it in a collection named reddit.

Leave a Reply

Your email address will not be published. Required fields are marked *